step 10400
Browse files- README.md +0 -2
- model.safetensors +1 -1
- trainer_state.json +1756 -4
README.md
CHANGED
@@ -12,8 +12,6 @@ license: apache-2.0
|
|
12 |
|
13 |
# BEE-spoke-data/bert-plus-L8-v1.0-allNLI_matryoshka
|
14 |
|
15 |
-
> if this message is here, this model card is a copy of the one from the _base model_ and not the actual model card.
|
16 |
-
|
17 |
This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search.
|
18 |
|
19 |
- this was finetuned at 512 ctx (allNLI is all short-ctx examples) but the base model supports 4096
|
|
|
12 |
|
13 |
# BEE-spoke-data/bert-plus-L8-v1.0-allNLI_matryoshka
|
14 |
|
|
|
|
|
15 |
This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search.
|
16 |
|
17 |
- this was finetuned at 512 ctx (allNLI is all short-ctx examples) but the base model supports 4096
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 352324400
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1395c5c36e20564cc37941ccb71e32f9fbf1039cc0aa1f5550d04df07dcab4f0
|
3 |
size 352324400
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "checkpoints/BEE-spoke-data-bert-plus-L8-v1.0-allNLI_matryoshka-synthetic-text-similarity-Mar-07_22-56/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 300,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -13427,6 +13427,1758 @@
|
|
13427 |
"learning_rate": 5.7839721254355405e-06,
|
13428 |
"loss": 0.0143,
|
13429 |
"step": 9200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13430 |
}
|
13431 |
],
|
13432 |
"logging_steps": 5,
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.9605806103224412,
|
3 |
+
"best_model_checkpoint": "checkpoints/BEE-spoke-data-bert-plus-L8-v1.0-allNLI_matryoshka-synthetic-text-similarity-Mar-07_22-56/checkpoint-9600",
|
4 |
+
"epoch": 0.8361809045226131,
|
5 |
"eval_steps": 300,
|
6 |
+
"global_step": 10400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
13427 |
"learning_rate": 5.7839721254355405e-06,
|
13428 |
"loss": 0.0143,
|
13429 |
"step": 9200
|
13430 |
+
},
|
13431 |
+
{
|
13432 |
+
"epoch": 0.74,
|
13433 |
+
"grad_norm": 0.21646694839000702,
|
13434 |
+
"learning_rate": 5.7750379701599224e-06,
|
13435 |
+
"loss": 0.0117,
|
13436 |
+
"step": 9205
|
13437 |
+
},
|
13438 |
+
{
|
13439 |
+
"epoch": 0.74,
|
13440 |
+
"grad_norm": 0.2780154347419739,
|
13441 |
+
"learning_rate": 5.766103814884303e-06,
|
13442 |
+
"loss": 0.0069,
|
13443 |
+
"step": 9210
|
13444 |
+
},
|
13445 |
+
{
|
13446 |
+
"epoch": 0.74,
|
13447 |
+
"grad_norm": 0.38007307052612305,
|
13448 |
+
"learning_rate": 5.7571696596086845e-06,
|
13449 |
+
"loss": 0.0071,
|
13450 |
+
"step": 9215
|
13451 |
+
},
|
13452 |
+
{
|
13453 |
+
"epoch": 0.74,
|
13454 |
+
"grad_norm": 0.23504765331745148,
|
13455 |
+
"learning_rate": 5.748235504333066e-06,
|
13456 |
+
"loss": 0.0073,
|
13457 |
+
"step": 9220
|
13458 |
+
},
|
13459 |
+
{
|
13460 |
+
"epoch": 0.74,
|
13461 |
+
"grad_norm": 0.3849126100540161,
|
13462 |
+
"learning_rate": 5.739301349057447e-06,
|
13463 |
+
"loss": 0.0062,
|
13464 |
+
"step": 9225
|
13465 |
+
},
|
13466 |
+
{
|
13467 |
+
"epoch": 0.74,
|
13468 |
+
"grad_norm": 0.39096078276634216,
|
13469 |
+
"learning_rate": 5.7303671937818285e-06,
|
13470 |
+
"loss": 0.0081,
|
13471 |
+
"step": 9230
|
13472 |
+
},
|
13473 |
+
{
|
13474 |
+
"epoch": 0.74,
|
13475 |
+
"grad_norm": 0.35881391167640686,
|
13476 |
+
"learning_rate": 5.72143303850621e-06,
|
13477 |
+
"loss": 0.0073,
|
13478 |
+
"step": 9235
|
13479 |
+
},
|
13480 |
+
{
|
13481 |
+
"epoch": 0.74,
|
13482 |
+
"grad_norm": 0.2720240652561188,
|
13483 |
+
"learning_rate": 5.7124988832305905e-06,
|
13484 |
+
"loss": 0.006,
|
13485 |
+
"step": 9240
|
13486 |
+
},
|
13487 |
+
{
|
13488 |
+
"epoch": 0.74,
|
13489 |
+
"grad_norm": 0.17227132618427277,
|
13490 |
+
"learning_rate": 5.7035647279549724e-06,
|
13491 |
+
"loss": 0.0073,
|
13492 |
+
"step": 9245
|
13493 |
+
},
|
13494 |
+
{
|
13495 |
+
"epoch": 0.74,
|
13496 |
+
"grad_norm": 0.4603612422943115,
|
13497 |
+
"learning_rate": 5.6946305726793535e-06,
|
13498 |
+
"loss": 0.0116,
|
13499 |
+
"step": 9250
|
13500 |
+
},
|
13501 |
+
{
|
13502 |
+
"epoch": 0.74,
|
13503 |
+
"grad_norm": 0.6847078204154968,
|
13504 |
+
"learning_rate": 5.685696417403735e-06,
|
13505 |
+
"loss": 0.0075,
|
13506 |
+
"step": 9255
|
13507 |
+
},
|
13508 |
+
{
|
13509 |
+
"epoch": 0.74,
|
13510 |
+
"grad_norm": 0.41257572174072266,
|
13511 |
+
"learning_rate": 5.676762262128116e-06,
|
13512 |
+
"loss": 0.0103,
|
13513 |
+
"step": 9260
|
13514 |
+
},
|
13515 |
+
{
|
13516 |
+
"epoch": 0.74,
|
13517 |
+
"grad_norm": 0.402539998292923,
|
13518 |
+
"learning_rate": 5.6678281068524974e-06,
|
13519 |
+
"loss": 0.0063,
|
13520 |
+
"step": 9265
|
13521 |
+
},
|
13522 |
+
{
|
13523 |
+
"epoch": 0.75,
|
13524 |
+
"grad_norm": 0.4992349147796631,
|
13525 |
+
"learning_rate": 5.658893951576879e-06,
|
13526 |
+
"loss": 0.0103,
|
13527 |
+
"step": 9270
|
13528 |
+
},
|
13529 |
+
{
|
13530 |
+
"epoch": 0.75,
|
13531 |
+
"grad_norm": 0.47921329736709595,
|
13532 |
+
"learning_rate": 5.6499597963012595e-06,
|
13533 |
+
"loss": 0.0082,
|
13534 |
+
"step": 9275
|
13535 |
+
},
|
13536 |
+
{
|
13537 |
+
"epoch": 0.75,
|
13538 |
+
"grad_norm": 0.2835437059402466,
|
13539 |
+
"learning_rate": 5.641025641025641e-06,
|
13540 |
+
"loss": 0.0085,
|
13541 |
+
"step": 9280
|
13542 |
+
},
|
13543 |
+
{
|
13544 |
+
"epoch": 0.75,
|
13545 |
+
"grad_norm": 0.23391133546829224,
|
13546 |
+
"learning_rate": 5.632091485750023e-06,
|
13547 |
+
"loss": 0.0078,
|
13548 |
+
"step": 9285
|
13549 |
+
},
|
13550 |
+
{
|
13551 |
+
"epoch": 0.75,
|
13552 |
+
"grad_norm": 0.3029947876930237,
|
13553 |
+
"learning_rate": 5.6231573304744035e-06,
|
13554 |
+
"loss": 0.0145,
|
13555 |
+
"step": 9290
|
13556 |
+
},
|
13557 |
+
{
|
13558 |
+
"epoch": 0.75,
|
13559 |
+
"grad_norm": 0.26060959696769714,
|
13560 |
+
"learning_rate": 5.614223175198785e-06,
|
13561 |
+
"loss": 0.0061,
|
13562 |
+
"step": 9295
|
13563 |
+
},
|
13564 |
+
{
|
13565 |
+
"epoch": 0.75,
|
13566 |
+
"grad_norm": 0.2901848554611206,
|
13567 |
+
"learning_rate": 5.605289019923167e-06,
|
13568 |
+
"loss": 0.0083,
|
13569 |
+
"step": 9300
|
13570 |
+
},
|
13571 |
+
{
|
13572 |
+
"epoch": 0.75,
|
13573 |
+
"eval_loss": 0.009584016166627407,
|
13574 |
+
"eval_pearson_cosine": 0.9615300255520981,
|
13575 |
+
"eval_pearson_dot": 0.9604189728759469,
|
13576 |
+
"eval_pearson_euclidean": 0.9517997399844298,
|
13577 |
+
"eval_pearson_manhattan": 0.9500464009872014,
|
13578 |
+
"eval_pearson_max": 0.9615300255520981,
|
13579 |
+
"eval_runtime": 425.5009,
|
13580 |
+
"eval_samples_per_second": 1.175,
|
13581 |
+
"eval_spearman_cosine": 0.9606140184560736,
|
13582 |
+
"eval_spearman_dot": 0.9573194772779091,
|
13583 |
+
"eval_spearman_euclidean": 0.9593890535562142,
|
13584 |
+
"eval_spearman_manhattan": 0.9579064236256946,
|
13585 |
+
"eval_spearman_max": 0.9606140184560736,
|
13586 |
+
"eval_steps_per_second": 1.175,
|
13587 |
+
"step": 9300
|
13588 |
+
},
|
13589 |
+
{
|
13590 |
+
"epoch": 0.75,
|
13591 |
+
"grad_norm": 0.4470210373401642,
|
13592 |
+
"learning_rate": 5.596354864647548e-06,
|
13593 |
+
"loss": 0.0129,
|
13594 |
+
"step": 9305
|
13595 |
+
},
|
13596 |
+
{
|
13597 |
+
"epoch": 0.75,
|
13598 |
+
"grad_norm": 0.4185398519039154,
|
13599 |
+
"learning_rate": 5.587420709371929e-06,
|
13600 |
+
"loss": 0.0081,
|
13601 |
+
"step": 9310
|
13602 |
+
},
|
13603 |
+
{
|
13604 |
+
"epoch": 0.75,
|
13605 |
+
"grad_norm": 0.6620985269546509,
|
13606 |
+
"learning_rate": 5.57848655409631e-06,
|
13607 |
+
"loss": 0.0092,
|
13608 |
+
"step": 9315
|
13609 |
+
},
|
13610 |
+
{
|
13611 |
+
"epoch": 0.75,
|
13612 |
+
"grad_norm": 0.5238575339317322,
|
13613 |
+
"learning_rate": 5.569552398820692e-06,
|
13614 |
+
"loss": 0.0106,
|
13615 |
+
"step": 9320
|
13616 |
+
},
|
13617 |
+
{
|
13618 |
+
"epoch": 0.75,
|
13619 |
+
"grad_norm": 0.8626998662948608,
|
13620 |
+
"learning_rate": 5.5606182435450724e-06,
|
13621 |
+
"loss": 0.0149,
|
13622 |
+
"step": 9325
|
13623 |
+
},
|
13624 |
+
{
|
13625 |
+
"epoch": 0.75,
|
13626 |
+
"grad_norm": 0.312152624130249,
|
13627 |
+
"learning_rate": 5.551684088269454e-06,
|
13628 |
+
"loss": 0.0085,
|
13629 |
+
"step": 9330
|
13630 |
+
},
|
13631 |
+
{
|
13632 |
+
"epoch": 0.75,
|
13633 |
+
"grad_norm": 0.2844025194644928,
|
13634 |
+
"learning_rate": 5.542749932993836e-06,
|
13635 |
+
"loss": 0.01,
|
13636 |
+
"step": 9335
|
13637 |
+
},
|
13638 |
+
{
|
13639 |
+
"epoch": 0.75,
|
13640 |
+
"grad_norm": 0.4623136520385742,
|
13641 |
+
"learning_rate": 5.533815777718216e-06,
|
13642 |
+
"loss": 0.0082,
|
13643 |
+
"step": 9340
|
13644 |
+
},
|
13645 |
+
{
|
13646 |
+
"epoch": 0.75,
|
13647 |
+
"grad_norm": 0.34604260325431824,
|
13648 |
+
"learning_rate": 5.524881622442598e-06,
|
13649 |
+
"loss": 0.0105,
|
13650 |
+
"step": 9345
|
13651 |
+
},
|
13652 |
+
{
|
13653 |
+
"epoch": 0.75,
|
13654 |
+
"grad_norm": 0.5630137920379639,
|
13655 |
+
"learning_rate": 5.51594746716698e-06,
|
13656 |
+
"loss": 0.008,
|
13657 |
+
"step": 9350
|
13658 |
+
},
|
13659 |
+
{
|
13660 |
+
"epoch": 0.75,
|
13661 |
+
"grad_norm": 0.22408334910869598,
|
13662 |
+
"learning_rate": 5.507013311891361e-06,
|
13663 |
+
"loss": 0.0086,
|
13664 |
+
"step": 9355
|
13665 |
+
},
|
13666 |
+
{
|
13667 |
+
"epoch": 0.75,
|
13668 |
+
"grad_norm": 0.3562946915626526,
|
13669 |
+
"learning_rate": 5.498079156615742e-06,
|
13670 |
+
"loss": 0.0077,
|
13671 |
+
"step": 9360
|
13672 |
+
},
|
13673 |
+
{
|
13674 |
+
"epoch": 0.75,
|
13675 |
+
"grad_norm": 0.6519134640693665,
|
13676 |
+
"learning_rate": 5.489145001340124e-06,
|
13677 |
+
"loss": 0.0273,
|
13678 |
+
"step": 9365
|
13679 |
+
},
|
13680 |
+
{
|
13681 |
+
"epoch": 0.75,
|
13682 |
+
"grad_norm": 0.3609578609466553,
|
13683 |
+
"learning_rate": 5.480210846064505e-06,
|
13684 |
+
"loss": 0.008,
|
13685 |
+
"step": 9370
|
13686 |
+
},
|
13687 |
+
{
|
13688 |
+
"epoch": 0.75,
|
13689 |
+
"grad_norm": 0.4362548291683197,
|
13690 |
+
"learning_rate": 5.471276690788886e-06,
|
13691 |
+
"loss": 0.0156,
|
13692 |
+
"step": 9375
|
13693 |
+
},
|
13694 |
+
{
|
13695 |
+
"epoch": 0.75,
|
13696 |
+
"grad_norm": 0.51788729429245,
|
13697 |
+
"learning_rate": 5.462342535513267e-06,
|
13698 |
+
"loss": 0.0143,
|
13699 |
+
"step": 9380
|
13700 |
+
},
|
13701 |
+
{
|
13702 |
+
"epoch": 0.75,
|
13703 |
+
"grad_norm": 0.3049197793006897,
|
13704 |
+
"learning_rate": 5.453408380237649e-06,
|
13705 |
+
"loss": 0.0084,
|
13706 |
+
"step": 9385
|
13707 |
+
},
|
13708 |
+
{
|
13709 |
+
"epoch": 0.75,
|
13710 |
+
"grad_norm": 0.27513086795806885,
|
13711 |
+
"learning_rate": 5.444474224962031e-06,
|
13712 |
+
"loss": 0.0089,
|
13713 |
+
"step": 9390
|
13714 |
+
},
|
13715 |
+
{
|
13716 |
+
"epoch": 0.76,
|
13717 |
+
"grad_norm": 0.5995715260505676,
|
13718 |
+
"learning_rate": 5.435540069686411e-06,
|
13719 |
+
"loss": 0.0176,
|
13720 |
+
"step": 9395
|
13721 |
+
},
|
13722 |
+
{
|
13723 |
+
"epoch": 0.76,
|
13724 |
+
"grad_norm": 0.4572034180164337,
|
13725 |
+
"learning_rate": 5.426605914410793e-06,
|
13726 |
+
"loss": 0.0079,
|
13727 |
+
"step": 9400
|
13728 |
+
},
|
13729 |
+
{
|
13730 |
+
"epoch": 0.76,
|
13731 |
+
"grad_norm": 0.38861045241355896,
|
13732 |
+
"learning_rate": 5.417671759135175e-06,
|
13733 |
+
"loss": 0.0093,
|
13734 |
+
"step": 9405
|
13735 |
+
},
|
13736 |
+
{
|
13737 |
+
"epoch": 0.76,
|
13738 |
+
"grad_norm": 0.2802937924861908,
|
13739 |
+
"learning_rate": 5.408737603859555e-06,
|
13740 |
+
"loss": 0.0099,
|
13741 |
+
"step": 9410
|
13742 |
+
},
|
13743 |
+
{
|
13744 |
+
"epoch": 0.76,
|
13745 |
+
"grad_norm": 0.2714308500289917,
|
13746 |
+
"learning_rate": 5.399803448583937e-06,
|
13747 |
+
"loss": 0.0073,
|
13748 |
+
"step": 9415
|
13749 |
+
},
|
13750 |
+
{
|
13751 |
+
"epoch": 0.76,
|
13752 |
+
"grad_norm": 0.4080353379249573,
|
13753 |
+
"learning_rate": 5.390869293308318e-06,
|
13754 |
+
"loss": 0.0069,
|
13755 |
+
"step": 9420
|
13756 |
+
},
|
13757 |
+
{
|
13758 |
+
"epoch": 0.76,
|
13759 |
+
"grad_norm": 0.22111305594444275,
|
13760 |
+
"learning_rate": 5.381935138032699e-06,
|
13761 |
+
"loss": 0.0058,
|
13762 |
+
"step": 9425
|
13763 |
+
},
|
13764 |
+
{
|
13765 |
+
"epoch": 0.76,
|
13766 |
+
"grad_norm": 0.7781515717506409,
|
13767 |
+
"learning_rate": 5.37300098275708e-06,
|
13768 |
+
"loss": 0.0066,
|
13769 |
+
"step": 9430
|
13770 |
+
},
|
13771 |
+
{
|
13772 |
+
"epoch": 0.76,
|
13773 |
+
"grad_norm": 0.3610839545726776,
|
13774 |
+
"learning_rate": 5.364066827481462e-06,
|
13775 |
+
"loss": 0.0088,
|
13776 |
+
"step": 9435
|
13777 |
+
},
|
13778 |
+
{
|
13779 |
+
"epoch": 0.76,
|
13780 |
+
"grad_norm": 0.3884970247745514,
|
13781 |
+
"learning_rate": 5.355132672205844e-06,
|
13782 |
+
"loss": 0.0094,
|
13783 |
+
"step": 9440
|
13784 |
+
},
|
13785 |
+
{
|
13786 |
+
"epoch": 0.76,
|
13787 |
+
"grad_norm": 0.5556257963180542,
|
13788 |
+
"learning_rate": 5.346198516930224e-06,
|
13789 |
+
"loss": 0.0105,
|
13790 |
+
"step": 9445
|
13791 |
+
},
|
13792 |
+
{
|
13793 |
+
"epoch": 0.76,
|
13794 |
+
"grad_norm": 0.5632768273353577,
|
13795 |
+
"learning_rate": 5.337264361654606e-06,
|
13796 |
+
"loss": 0.0091,
|
13797 |
+
"step": 9450
|
13798 |
+
},
|
13799 |
+
{
|
13800 |
+
"epoch": 0.76,
|
13801 |
+
"grad_norm": 0.6423171758651733,
|
13802 |
+
"learning_rate": 5.328330206378988e-06,
|
13803 |
+
"loss": 0.0079,
|
13804 |
+
"step": 9455
|
13805 |
+
},
|
13806 |
+
{
|
13807 |
+
"epoch": 0.76,
|
13808 |
+
"grad_norm": 0.5519477725028992,
|
13809 |
+
"learning_rate": 5.319396051103368e-06,
|
13810 |
+
"loss": 0.0192,
|
13811 |
+
"step": 9460
|
13812 |
+
},
|
13813 |
+
{
|
13814 |
+
"epoch": 0.76,
|
13815 |
+
"grad_norm": 0.5822392106056213,
|
13816 |
+
"learning_rate": 5.31046189582775e-06,
|
13817 |
+
"loss": 0.0123,
|
13818 |
+
"step": 9465
|
13819 |
+
},
|
13820 |
+
{
|
13821 |
+
"epoch": 0.76,
|
13822 |
+
"grad_norm": 0.49340376257896423,
|
13823 |
+
"learning_rate": 5.301527740552132e-06,
|
13824 |
+
"loss": 0.0083,
|
13825 |
+
"step": 9470
|
13826 |
+
},
|
13827 |
+
{
|
13828 |
+
"epoch": 0.76,
|
13829 |
+
"grad_norm": 0.3505455553531647,
|
13830 |
+
"learning_rate": 5.292593585276512e-06,
|
13831 |
+
"loss": 0.011,
|
13832 |
+
"step": 9475
|
13833 |
+
},
|
13834 |
+
{
|
13835 |
+
"epoch": 0.76,
|
13836 |
+
"grad_norm": 0.7089307308197021,
|
13837 |
+
"learning_rate": 5.283659430000894e-06,
|
13838 |
+
"loss": 0.0079,
|
13839 |
+
"step": 9480
|
13840 |
+
},
|
13841 |
+
{
|
13842 |
+
"epoch": 0.76,
|
13843 |
+
"grad_norm": 0.791226863861084,
|
13844 |
+
"learning_rate": 5.274725274725275e-06,
|
13845 |
+
"loss": 0.0124,
|
13846 |
+
"step": 9485
|
13847 |
+
},
|
13848 |
+
{
|
13849 |
+
"epoch": 0.76,
|
13850 |
+
"grad_norm": 0.38624152541160583,
|
13851 |
+
"learning_rate": 5.265791119449657e-06,
|
13852 |
+
"loss": 0.0076,
|
13853 |
+
"step": 9490
|
13854 |
+
},
|
13855 |
+
{
|
13856 |
+
"epoch": 0.76,
|
13857 |
+
"grad_norm": 0.2802993953227997,
|
13858 |
+
"learning_rate": 5.256856964174037e-06,
|
13859 |
+
"loss": 0.0183,
|
13860 |
+
"step": 9495
|
13861 |
+
},
|
13862 |
+
{
|
13863 |
+
"epoch": 0.76,
|
13864 |
+
"grad_norm": 0.4529344439506531,
|
13865 |
+
"learning_rate": 5.247922808898419e-06,
|
13866 |
+
"loss": 0.0112,
|
13867 |
+
"step": 9500
|
13868 |
+
},
|
13869 |
+
{
|
13870 |
+
"epoch": 0.76,
|
13871 |
+
"grad_norm": 1.167479395866394,
|
13872 |
+
"learning_rate": 5.238988653622801e-06,
|
13873 |
+
"loss": 0.0128,
|
13874 |
+
"step": 9505
|
13875 |
+
},
|
13876 |
+
{
|
13877 |
+
"epoch": 0.76,
|
13878 |
+
"grad_norm": 0.2423386126756668,
|
13879 |
+
"learning_rate": 5.230054498347181e-06,
|
13880 |
+
"loss": 0.0093,
|
13881 |
+
"step": 9510
|
13882 |
+
},
|
13883 |
+
{
|
13884 |
+
"epoch": 0.77,
|
13885 |
+
"grad_norm": 0.3792467713356018,
|
13886 |
+
"learning_rate": 5.221120343071563e-06,
|
13887 |
+
"loss": 0.0078,
|
13888 |
+
"step": 9515
|
13889 |
+
},
|
13890 |
+
{
|
13891 |
+
"epoch": 0.77,
|
13892 |
+
"grad_norm": 0.47463178634643555,
|
13893 |
+
"learning_rate": 5.212186187795945e-06,
|
13894 |
+
"loss": 0.0116,
|
13895 |
+
"step": 9520
|
13896 |
+
},
|
13897 |
+
{
|
13898 |
+
"epoch": 0.77,
|
13899 |
+
"grad_norm": 0.9293251037597656,
|
13900 |
+
"learning_rate": 5.203252032520326e-06,
|
13901 |
+
"loss": 0.0107,
|
13902 |
+
"step": 9525
|
13903 |
+
},
|
13904 |
+
{
|
13905 |
+
"epoch": 0.77,
|
13906 |
+
"grad_norm": 41.848262786865234,
|
13907 |
+
"learning_rate": 5.194317877244707e-06,
|
13908 |
+
"loss": 0.0182,
|
13909 |
+
"step": 9530
|
13910 |
+
},
|
13911 |
+
{
|
13912 |
+
"epoch": 0.77,
|
13913 |
+
"grad_norm": 0.34961816668510437,
|
13914 |
+
"learning_rate": 5.185383721969088e-06,
|
13915 |
+
"loss": 0.007,
|
13916 |
+
"step": 9535
|
13917 |
+
},
|
13918 |
+
{
|
13919 |
+
"epoch": 0.77,
|
13920 |
+
"grad_norm": 1.822770595550537,
|
13921 |
+
"learning_rate": 5.17644956669347e-06,
|
13922 |
+
"loss": 0.0151,
|
13923 |
+
"step": 9540
|
13924 |
+
},
|
13925 |
+
{
|
13926 |
+
"epoch": 0.77,
|
13927 |
+
"grad_norm": 0.3188020884990692,
|
13928 |
+
"learning_rate": 5.167515411417851e-06,
|
13929 |
+
"loss": 0.0067,
|
13930 |
+
"step": 9545
|
13931 |
+
},
|
13932 |
+
{
|
13933 |
+
"epoch": 0.77,
|
13934 |
+
"grad_norm": 0.43374982476234436,
|
13935 |
+
"learning_rate": 5.158581256142232e-06,
|
13936 |
+
"loss": 0.0084,
|
13937 |
+
"step": 9550
|
13938 |
+
},
|
13939 |
+
{
|
13940 |
+
"epoch": 0.77,
|
13941 |
+
"grad_norm": 0.4171868562698364,
|
13942 |
+
"learning_rate": 5.149647100866614e-06,
|
13943 |
+
"loss": 0.0073,
|
13944 |
+
"step": 9555
|
13945 |
+
},
|
13946 |
+
{
|
13947 |
+
"epoch": 0.77,
|
13948 |
+
"grad_norm": 0.5583686232566833,
|
13949 |
+
"learning_rate": 5.140712945590994e-06,
|
13950 |
+
"loss": 0.0092,
|
13951 |
+
"step": 9560
|
13952 |
+
},
|
13953 |
+
{
|
13954 |
+
"epoch": 0.77,
|
13955 |
+
"grad_norm": 0.24175512790679932,
|
13956 |
+
"learning_rate": 5.131778790315376e-06,
|
13957 |
+
"loss": 0.0081,
|
13958 |
+
"step": 9565
|
13959 |
+
},
|
13960 |
+
{
|
13961 |
+
"epoch": 0.77,
|
13962 |
+
"grad_norm": 0.3415161371231079,
|
13963 |
+
"learning_rate": 5.122844635039758e-06,
|
13964 |
+
"loss": 0.0109,
|
13965 |
+
"step": 9570
|
13966 |
+
},
|
13967 |
+
{
|
13968 |
+
"epoch": 0.77,
|
13969 |
+
"grad_norm": 0.5684280395507812,
|
13970 |
+
"learning_rate": 5.11391047976414e-06,
|
13971 |
+
"loss": 0.0072,
|
13972 |
+
"step": 9575
|
13973 |
+
},
|
13974 |
+
{
|
13975 |
+
"epoch": 0.77,
|
13976 |
+
"grad_norm": 0.9785246849060059,
|
13977 |
+
"learning_rate": 5.10497632448852e-06,
|
13978 |
+
"loss": 0.0098,
|
13979 |
+
"step": 9580
|
13980 |
+
},
|
13981 |
+
{
|
13982 |
+
"epoch": 0.77,
|
13983 |
+
"grad_norm": 0.2952157258987427,
|
13984 |
+
"learning_rate": 5.096042169212902e-06,
|
13985 |
+
"loss": 0.008,
|
13986 |
+
"step": 9585
|
13987 |
+
},
|
13988 |
+
{
|
13989 |
+
"epoch": 0.77,
|
13990 |
+
"grad_norm": 0.20075848698616028,
|
13991 |
+
"learning_rate": 5.087108013937283e-06,
|
13992 |
+
"loss": 0.0074,
|
13993 |
+
"step": 9590
|
13994 |
+
},
|
13995 |
+
{
|
13996 |
+
"epoch": 0.77,
|
13997 |
+
"grad_norm": 0.3542312681674957,
|
13998 |
+
"learning_rate": 5.078173858661664e-06,
|
13999 |
+
"loss": 0.0109,
|
14000 |
+
"step": 9595
|
14001 |
+
},
|
14002 |
+
{
|
14003 |
+
"epoch": 0.77,
|
14004 |
+
"grad_norm": 0.38026684522628784,
|
14005 |
+
"learning_rate": 5.069239703386045e-06,
|
14006 |
+
"loss": 0.0069,
|
14007 |
+
"step": 9600
|
14008 |
+
},
|
14009 |
+
{
|
14010 |
+
"epoch": 0.77,
|
14011 |
+
"eval_loss": 0.008200183510780334,
|
14012 |
+
"eval_pearson_cosine": 0.9642878383068713,
|
14013 |
+
"eval_pearson_dot": 0.9627028886298191,
|
14014 |
+
"eval_pearson_euclidean": 0.9523746354427236,
|
14015 |
+
"eval_pearson_manhattan": 0.9507404567548948,
|
14016 |
+
"eval_pearson_max": 0.9642878383068713,
|
14017 |
+
"eval_runtime": 425.6137,
|
14018 |
+
"eval_samples_per_second": 1.175,
|
14019 |
+
"eval_spearman_cosine": 0.9605806103224412,
|
14020 |
+
"eval_spearman_dot": 0.955501806007224,
|
14021 |
+
"eval_spearman_euclidean": 0.9597567350269401,
|
14022 |
+
"eval_spearman_manhattan": 0.9581544886179545,
|
14023 |
+
"eval_spearman_max": 0.9605806103224412,
|
14024 |
+
"eval_steps_per_second": 1.175,
|
14025 |
+
"step": 9600
|
14026 |
+
},
|
14027 |
+
{
|
14028 |
+
"epoch": 0.77,
|
14029 |
+
"grad_norm": 0.37074190378189087,
|
14030 |
+
"learning_rate": 5.060305548110427e-06,
|
14031 |
+
"loss": 0.0173,
|
14032 |
+
"step": 9605
|
14033 |
+
},
|
14034 |
+
{
|
14035 |
+
"epoch": 0.77,
|
14036 |
+
"grad_norm": 0.3430071175098419,
|
14037 |
+
"learning_rate": 5.051371392834808e-06,
|
14038 |
+
"loss": 0.0077,
|
14039 |
+
"step": 9610
|
14040 |
+
},
|
14041 |
+
{
|
14042 |
+
"epoch": 0.77,
|
14043 |
+
"grad_norm": 0.516249418258667,
|
14044 |
+
"learning_rate": 5.042437237559189e-06,
|
14045 |
+
"loss": 0.0059,
|
14046 |
+
"step": 9615
|
14047 |
+
},
|
14048 |
+
{
|
14049 |
+
"epoch": 0.77,
|
14050 |
+
"grad_norm": 0.3532971143722534,
|
14051 |
+
"learning_rate": 5.033503082283571e-06,
|
14052 |
+
"loss": 0.0113,
|
14053 |
+
"step": 9620
|
14054 |
+
},
|
14055 |
+
{
|
14056 |
+
"epoch": 0.77,
|
14057 |
+
"grad_norm": 0.4686454236507416,
|
14058 |
+
"learning_rate": 5.0245689270079526e-06,
|
14059 |
+
"loss": 0.0097,
|
14060 |
+
"step": 9625
|
14061 |
+
},
|
14062 |
+
{
|
14063 |
+
"epoch": 0.77,
|
14064 |
+
"grad_norm": 0.2624160051345825,
|
14065 |
+
"learning_rate": 5.015634771732333e-06,
|
14066 |
+
"loss": 0.0087,
|
14067 |
+
"step": 9630
|
14068 |
+
},
|
14069 |
+
{
|
14070 |
+
"epoch": 0.77,
|
14071 |
+
"grad_norm": 0.35835760831832886,
|
14072 |
+
"learning_rate": 5.006700616456715e-06,
|
14073 |
+
"loss": 0.0076,
|
14074 |
+
"step": 9635
|
14075 |
+
},
|
14076 |
+
{
|
14077 |
+
"epoch": 0.78,
|
14078 |
+
"grad_norm": 0.6053426265716553,
|
14079 |
+
"learning_rate": 4.997766461181096e-06,
|
14080 |
+
"loss": 0.0158,
|
14081 |
+
"step": 9640
|
14082 |
+
},
|
14083 |
+
{
|
14084 |
+
"epoch": 0.78,
|
14085 |
+
"grad_norm": 0.399104505777359,
|
14086 |
+
"learning_rate": 4.9888323059054776e-06,
|
14087 |
+
"loss": 0.0085,
|
14088 |
+
"step": 9645
|
14089 |
+
},
|
14090 |
+
{
|
14091 |
+
"epoch": 0.78,
|
14092 |
+
"grad_norm": 0.3674059212207794,
|
14093 |
+
"learning_rate": 4.979898150629859e-06,
|
14094 |
+
"loss": 0.0112,
|
14095 |
+
"step": 9650
|
14096 |
+
},
|
14097 |
+
{
|
14098 |
+
"epoch": 0.78,
|
14099 |
+
"grad_norm": 0.16110941767692566,
|
14100 |
+
"learning_rate": 4.97096399535424e-06,
|
14101 |
+
"loss": 0.0086,
|
14102 |
+
"step": 9655
|
14103 |
+
},
|
14104 |
+
{
|
14105 |
+
"epoch": 0.78,
|
14106 |
+
"grad_norm": 0.2616519331932068,
|
14107 |
+
"learning_rate": 4.962029840078621e-06,
|
14108 |
+
"loss": 0.009,
|
14109 |
+
"step": 9660
|
14110 |
+
},
|
14111 |
+
{
|
14112 |
+
"epoch": 0.78,
|
14113 |
+
"grad_norm": 0.6079025268554688,
|
14114 |
+
"learning_rate": 4.953095684803002e-06,
|
14115 |
+
"loss": 0.0082,
|
14116 |
+
"step": 9665
|
14117 |
+
},
|
14118 |
+
{
|
14119 |
+
"epoch": 0.78,
|
14120 |
+
"grad_norm": 0.30316200852394104,
|
14121 |
+
"learning_rate": 4.944161529527384e-06,
|
14122 |
+
"loss": 0.0063,
|
14123 |
+
"step": 9670
|
14124 |
+
},
|
14125 |
+
{
|
14126 |
+
"epoch": 0.78,
|
14127 |
+
"grad_norm": 0.36754128336906433,
|
14128 |
+
"learning_rate": 4.935227374251765e-06,
|
14129 |
+
"loss": 0.0073,
|
14130 |
+
"step": 9675
|
14131 |
+
},
|
14132 |
+
{
|
14133 |
+
"epoch": 0.78,
|
14134 |
+
"grad_norm": 0.6272875666618347,
|
14135 |
+
"learning_rate": 4.9262932189761465e-06,
|
14136 |
+
"loss": 0.0089,
|
14137 |
+
"step": 9680
|
14138 |
+
},
|
14139 |
+
{
|
14140 |
+
"epoch": 0.78,
|
14141 |
+
"grad_norm": 0.5529404878616333,
|
14142 |
+
"learning_rate": 4.9173590637005276e-06,
|
14143 |
+
"loss": 0.0095,
|
14144 |
+
"step": 9685
|
14145 |
+
},
|
14146 |
+
{
|
14147 |
+
"epoch": 0.78,
|
14148 |
+
"grad_norm": 0.2829398214817047,
|
14149 |
+
"learning_rate": 4.908424908424909e-06,
|
14150 |
+
"loss": 0.011,
|
14151 |
+
"step": 9690
|
14152 |
+
},
|
14153 |
+
{
|
14154 |
+
"epoch": 0.78,
|
14155 |
+
"grad_norm": 0.30931296944618225,
|
14156 |
+
"learning_rate": 4.8994907531492905e-06,
|
14157 |
+
"loss": 0.0103,
|
14158 |
+
"step": 9695
|
14159 |
+
},
|
14160 |
+
{
|
14161 |
+
"epoch": 0.78,
|
14162 |
+
"grad_norm": 0.6834177374839783,
|
14163 |
+
"learning_rate": 4.8905565978736715e-06,
|
14164 |
+
"loss": 0.0087,
|
14165 |
+
"step": 9700
|
14166 |
+
},
|
14167 |
+
{
|
14168 |
+
"epoch": 0.78,
|
14169 |
+
"grad_norm": 0.42869821190834045,
|
14170 |
+
"learning_rate": 4.8816224425980526e-06,
|
14171 |
+
"loss": 0.0088,
|
14172 |
+
"step": 9705
|
14173 |
+
},
|
14174 |
+
{
|
14175 |
+
"epoch": 0.78,
|
14176 |
+
"grad_norm": 0.3291241228580475,
|
14177 |
+
"learning_rate": 4.8726882873224344e-06,
|
14178 |
+
"loss": 0.0061,
|
14179 |
+
"step": 9710
|
14180 |
+
},
|
14181 |
+
{
|
14182 |
+
"epoch": 0.78,
|
14183 |
+
"grad_norm": 0.3477993607521057,
|
14184 |
+
"learning_rate": 4.8637541320468155e-06,
|
14185 |
+
"loss": 0.007,
|
14186 |
+
"step": 9715
|
14187 |
+
},
|
14188 |
+
{
|
14189 |
+
"epoch": 0.78,
|
14190 |
+
"grad_norm": 0.393031507730484,
|
14191 |
+
"learning_rate": 4.8548199767711965e-06,
|
14192 |
+
"loss": 0.0077,
|
14193 |
+
"step": 9720
|
14194 |
+
},
|
14195 |
+
{
|
14196 |
+
"epoch": 0.78,
|
14197 |
+
"grad_norm": 0.37427353858947754,
|
14198 |
+
"learning_rate": 4.8458858214955776e-06,
|
14199 |
+
"loss": 0.0086,
|
14200 |
+
"step": 9725
|
14201 |
+
},
|
14202 |
+
{
|
14203 |
+
"epoch": 0.78,
|
14204 |
+
"grad_norm": 0.4370558261871338,
|
14205 |
+
"learning_rate": 4.8369516662199594e-06,
|
14206 |
+
"loss": 0.0064,
|
14207 |
+
"step": 9730
|
14208 |
+
},
|
14209 |
+
{
|
14210 |
+
"epoch": 0.78,
|
14211 |
+
"grad_norm": 0.31545019149780273,
|
14212 |
+
"learning_rate": 4.8280175109443405e-06,
|
14213 |
+
"loss": 0.0059,
|
14214 |
+
"step": 9735
|
14215 |
+
},
|
14216 |
+
{
|
14217 |
+
"epoch": 0.78,
|
14218 |
+
"grad_norm": 0.7945960760116577,
|
14219 |
+
"learning_rate": 4.8190833556687215e-06,
|
14220 |
+
"loss": 0.021,
|
14221 |
+
"step": 9740
|
14222 |
+
},
|
14223 |
+
{
|
14224 |
+
"epoch": 0.78,
|
14225 |
+
"grad_norm": 0.29888418316841125,
|
14226 |
+
"learning_rate": 4.810149200393103e-06,
|
14227 |
+
"loss": 0.0062,
|
14228 |
+
"step": 9745
|
14229 |
+
},
|
14230 |
+
{
|
14231 |
+
"epoch": 0.78,
|
14232 |
+
"grad_norm": 3.3094396591186523,
|
14233 |
+
"learning_rate": 4.8012150451174844e-06,
|
14234 |
+
"loss": 0.0154,
|
14235 |
+
"step": 9750
|
14236 |
+
},
|
14237 |
+
{
|
14238 |
+
"epoch": 0.78,
|
14239 |
+
"grad_norm": 0.2018340677022934,
|
14240 |
+
"learning_rate": 4.792280889841866e-06,
|
14241 |
+
"loss": 0.0074,
|
14242 |
+
"step": 9755
|
14243 |
+
},
|
14244 |
+
{
|
14245 |
+
"epoch": 0.78,
|
14246 |
+
"grad_norm": 0.7358143329620361,
|
14247 |
+
"learning_rate": 4.783346734566247e-06,
|
14248 |
+
"loss": 0.0154,
|
14249 |
+
"step": 9760
|
14250 |
+
},
|
14251 |
+
{
|
14252 |
+
"epoch": 0.79,
|
14253 |
+
"grad_norm": 0.580872654914856,
|
14254 |
+
"learning_rate": 4.774412579290628e-06,
|
14255 |
+
"loss": 0.0111,
|
14256 |
+
"step": 9765
|
14257 |
+
},
|
14258 |
+
{
|
14259 |
+
"epoch": 0.79,
|
14260 |
+
"grad_norm": 0.3042278289794922,
|
14261 |
+
"learning_rate": 4.7654784240150095e-06,
|
14262 |
+
"loss": 0.0078,
|
14263 |
+
"step": 9770
|
14264 |
+
},
|
14265 |
+
{
|
14266 |
+
"epoch": 0.79,
|
14267 |
+
"grad_norm": 0.42176923155784607,
|
14268 |
+
"learning_rate": 4.7565442687393905e-06,
|
14269 |
+
"loss": 0.0062,
|
14270 |
+
"step": 9775
|
14271 |
+
},
|
14272 |
+
{
|
14273 |
+
"epoch": 0.79,
|
14274 |
+
"grad_norm": 0.30140987038612366,
|
14275 |
+
"learning_rate": 4.747610113463772e-06,
|
14276 |
+
"loss": 0.0063,
|
14277 |
+
"step": 9780
|
14278 |
+
},
|
14279 |
+
{
|
14280 |
+
"epoch": 0.79,
|
14281 |
+
"grad_norm": 0.5304137468338013,
|
14282 |
+
"learning_rate": 4.738675958188153e-06,
|
14283 |
+
"loss": 0.0097,
|
14284 |
+
"step": 9785
|
14285 |
+
},
|
14286 |
+
{
|
14287 |
+
"epoch": 0.79,
|
14288 |
+
"grad_norm": 0.36996015906333923,
|
14289 |
+
"learning_rate": 4.729741802912535e-06,
|
14290 |
+
"loss": 0.0097,
|
14291 |
+
"step": 9790
|
14292 |
+
},
|
14293 |
+
{
|
14294 |
+
"epoch": 0.79,
|
14295 |
+
"grad_norm": 0.6667109131813049,
|
14296 |
+
"learning_rate": 4.720807647636916e-06,
|
14297 |
+
"loss": 0.0075,
|
14298 |
+
"step": 9795
|
14299 |
+
},
|
14300 |
+
{
|
14301 |
+
"epoch": 0.79,
|
14302 |
+
"grad_norm": 0.2762182950973511,
|
14303 |
+
"learning_rate": 4.711873492361297e-06,
|
14304 |
+
"loss": 0.0064,
|
14305 |
+
"step": 9800
|
14306 |
+
},
|
14307 |
+
{
|
14308 |
+
"epoch": 0.79,
|
14309 |
+
"grad_norm": 0.3733229339122772,
|
14310 |
+
"learning_rate": 4.702939337085679e-06,
|
14311 |
+
"loss": 0.0054,
|
14312 |
+
"step": 9805
|
14313 |
+
},
|
14314 |
+
{
|
14315 |
+
"epoch": 0.79,
|
14316 |
+
"grad_norm": 0.3811498284339905,
|
14317 |
+
"learning_rate": 4.69400518181006e-06,
|
14318 |
+
"loss": 0.0062,
|
14319 |
+
"step": 9810
|
14320 |
+
},
|
14321 |
+
{
|
14322 |
+
"epoch": 0.79,
|
14323 |
+
"grad_norm": 0.5551919937133789,
|
14324 |
+
"learning_rate": 4.685071026534442e-06,
|
14325 |
+
"loss": 0.0089,
|
14326 |
+
"step": 9815
|
14327 |
+
},
|
14328 |
+
{
|
14329 |
+
"epoch": 0.79,
|
14330 |
+
"grad_norm": 0.5745194554328918,
|
14331 |
+
"learning_rate": 4.676136871258823e-06,
|
14332 |
+
"loss": 0.0097,
|
14333 |
+
"step": 9820
|
14334 |
+
},
|
14335 |
+
{
|
14336 |
+
"epoch": 0.79,
|
14337 |
+
"grad_norm": 0.2763228118419647,
|
14338 |
+
"learning_rate": 4.667202715983204e-06,
|
14339 |
+
"loss": 0.01,
|
14340 |
+
"step": 9825
|
14341 |
+
},
|
14342 |
+
{
|
14343 |
+
"epoch": 0.79,
|
14344 |
+
"grad_norm": 0.5403454899787903,
|
14345 |
+
"learning_rate": 4.658268560707585e-06,
|
14346 |
+
"loss": 0.0134,
|
14347 |
+
"step": 9830
|
14348 |
+
},
|
14349 |
+
{
|
14350 |
+
"epoch": 0.79,
|
14351 |
+
"grad_norm": 0.37194764614105225,
|
14352 |
+
"learning_rate": 4.649334405431966e-06,
|
14353 |
+
"loss": 0.0051,
|
14354 |
+
"step": 9835
|
14355 |
+
},
|
14356 |
+
{
|
14357 |
+
"epoch": 0.79,
|
14358 |
+
"grad_norm": 0.21742063760757446,
|
14359 |
+
"learning_rate": 4.640400250156348e-06,
|
14360 |
+
"loss": 0.0123,
|
14361 |
+
"step": 9840
|
14362 |
+
},
|
14363 |
+
{
|
14364 |
+
"epoch": 0.79,
|
14365 |
+
"grad_norm": 0.3551539182662964,
|
14366 |
+
"learning_rate": 4.631466094880729e-06,
|
14367 |
+
"loss": 0.0104,
|
14368 |
+
"step": 9845
|
14369 |
+
},
|
14370 |
+
{
|
14371 |
+
"epoch": 0.79,
|
14372 |
+
"grad_norm": 0.248150035738945,
|
14373 |
+
"learning_rate": 4.62253193960511e-06,
|
14374 |
+
"loss": 0.0054,
|
14375 |
+
"step": 9850
|
14376 |
+
},
|
14377 |
+
{
|
14378 |
+
"epoch": 0.79,
|
14379 |
+
"grad_norm": 0.6017441153526306,
|
14380 |
+
"learning_rate": 4.613597784329492e-06,
|
14381 |
+
"loss": 0.0115,
|
14382 |
+
"step": 9855
|
14383 |
+
},
|
14384 |
+
{
|
14385 |
+
"epoch": 0.79,
|
14386 |
+
"grad_norm": 0.46963444352149963,
|
14387 |
+
"learning_rate": 4.604663629053873e-06,
|
14388 |
+
"loss": 0.0087,
|
14389 |
+
"step": 9860
|
14390 |
+
},
|
14391 |
+
{
|
14392 |
+
"epoch": 0.79,
|
14393 |
+
"grad_norm": 0.26475605368614197,
|
14394 |
+
"learning_rate": 4.595729473778255e-06,
|
14395 |
+
"loss": 0.0064,
|
14396 |
+
"step": 9865
|
14397 |
+
},
|
14398 |
+
{
|
14399 |
+
"epoch": 0.79,
|
14400 |
+
"grad_norm": 0.3036366403102875,
|
14401 |
+
"learning_rate": 4.586795318502636e-06,
|
14402 |
+
"loss": 0.0094,
|
14403 |
+
"step": 9870
|
14404 |
+
},
|
14405 |
+
{
|
14406 |
+
"epoch": 0.79,
|
14407 |
+
"grad_norm": 0.4167456328868866,
|
14408 |
+
"learning_rate": 4.577861163227017e-06,
|
14409 |
+
"loss": 0.008,
|
14410 |
+
"step": 9875
|
14411 |
+
},
|
14412 |
+
{
|
14413 |
+
"epoch": 0.79,
|
14414 |
+
"grad_norm": 0.27215877175331116,
|
14415 |
+
"learning_rate": 4.568927007951398e-06,
|
14416 |
+
"loss": 0.0087,
|
14417 |
+
"step": 9880
|
14418 |
+
},
|
14419 |
+
{
|
14420 |
+
"epoch": 0.79,
|
14421 |
+
"grad_norm": 0.3947705924510956,
|
14422 |
+
"learning_rate": 4.55999285267578e-06,
|
14423 |
+
"loss": 0.0099,
|
14424 |
+
"step": 9885
|
14425 |
+
},
|
14426 |
+
{
|
14427 |
+
"epoch": 0.8,
|
14428 |
+
"grad_norm": 0.261850506067276,
|
14429 |
+
"learning_rate": 4.551058697400161e-06,
|
14430 |
+
"loss": 0.012,
|
14431 |
+
"step": 9890
|
14432 |
+
},
|
14433 |
+
{
|
14434 |
+
"epoch": 0.8,
|
14435 |
+
"grad_norm": 0.27852803468704224,
|
14436 |
+
"learning_rate": 4.542124542124542e-06,
|
14437 |
+
"loss": 0.0063,
|
14438 |
+
"step": 9895
|
14439 |
+
},
|
14440 |
+
{
|
14441 |
+
"epoch": 0.8,
|
14442 |
+
"grad_norm": 0.3846147060394287,
|
14443 |
+
"learning_rate": 4.533190386848923e-06,
|
14444 |
+
"loss": 0.0072,
|
14445 |
+
"step": 9900
|
14446 |
+
},
|
14447 |
+
{
|
14448 |
+
"epoch": 0.8,
|
14449 |
+
"eval_loss": 0.007968730293214321,
|
14450 |
+
"eval_pearson_cosine": 0.9658888665742654,
|
14451 |
+
"eval_pearson_dot": 0.9627392628207877,
|
14452 |
+
"eval_pearson_euclidean": 0.9531911193892233,
|
14453 |
+
"eval_pearson_manhattan": 0.9513587882321657,
|
14454 |
+
"eval_pearson_max": 0.9658888665742654,
|
14455 |
+
"eval_runtime": 425.901,
|
14456 |
+
"eval_samples_per_second": 1.174,
|
14457 |
+
"eval_spearman_cosine": 0.9624986019944078,
|
14458 |
+
"eval_spearman_dot": 0.955504398017592,
|
14459 |
+
"eval_spearman_euclidean": 0.9630360121440484,
|
14460 |
+
"eval_spearman_manhattan": 0.9610320041280165,
|
14461 |
+
"eval_spearman_max": 0.9630360121440484,
|
14462 |
+
"eval_steps_per_second": 1.174,
|
14463 |
+
"step": 9900
|
14464 |
+
},
|
14465 |
+
{
|
14466 |
+
"epoch": 0.8,
|
14467 |
+
"grad_norm": 0.45619475841522217,
|
14468 |
+
"learning_rate": 4.524256231573305e-06,
|
14469 |
+
"loss": 0.009,
|
14470 |
+
"step": 9905
|
14471 |
+
},
|
14472 |
+
{
|
14473 |
+
"epoch": 0.8,
|
14474 |
+
"grad_norm": 0.39344677329063416,
|
14475 |
+
"learning_rate": 4.515322076297686e-06,
|
14476 |
+
"loss": 0.0088,
|
14477 |
+
"step": 9910
|
14478 |
+
},
|
14479 |
+
{
|
14480 |
+
"epoch": 0.8,
|
14481 |
+
"grad_norm": 0.18160255253314972,
|
14482 |
+
"learning_rate": 4.506387921022068e-06,
|
14483 |
+
"loss": 0.0059,
|
14484 |
+
"step": 9915
|
14485 |
+
},
|
14486 |
+
{
|
14487 |
+
"epoch": 0.8,
|
14488 |
+
"grad_norm": 0.19619829952716827,
|
14489 |
+
"learning_rate": 4.497453765746449e-06,
|
14490 |
+
"loss": 0.0081,
|
14491 |
+
"step": 9920
|
14492 |
+
},
|
14493 |
+
{
|
14494 |
+
"epoch": 0.8,
|
14495 |
+
"grad_norm": 0.2846349775791168,
|
14496 |
+
"learning_rate": 4.488519610470831e-06,
|
14497 |
+
"loss": 0.0049,
|
14498 |
+
"step": 9925
|
14499 |
+
},
|
14500 |
+
{
|
14501 |
+
"epoch": 0.8,
|
14502 |
+
"grad_norm": 0.3724232316017151,
|
14503 |
+
"learning_rate": 4.479585455195212e-06,
|
14504 |
+
"loss": 0.0118,
|
14505 |
+
"step": 9930
|
14506 |
+
},
|
14507 |
+
{
|
14508 |
+
"epoch": 0.8,
|
14509 |
+
"grad_norm": 0.3385705351829529,
|
14510 |
+
"learning_rate": 4.470651299919593e-06,
|
14511 |
+
"loss": 0.0069,
|
14512 |
+
"step": 9935
|
14513 |
+
},
|
14514 |
+
{
|
14515 |
+
"epoch": 0.8,
|
14516 |
+
"grad_norm": 0.2874641418457031,
|
14517 |
+
"learning_rate": 4.461717144643974e-06,
|
14518 |
+
"loss": 0.006,
|
14519 |
+
"step": 9940
|
14520 |
+
},
|
14521 |
+
{
|
14522 |
+
"epoch": 0.8,
|
14523 |
+
"grad_norm": 0.3358646035194397,
|
14524 |
+
"learning_rate": 4.452782989368355e-06,
|
14525 |
+
"loss": 0.0062,
|
14526 |
+
"step": 9945
|
14527 |
+
},
|
14528 |
+
{
|
14529 |
+
"epoch": 0.8,
|
14530 |
+
"grad_norm": 0.7037550806999207,
|
14531 |
+
"learning_rate": 4.443848834092737e-06,
|
14532 |
+
"loss": 0.009,
|
14533 |
+
"step": 9950
|
14534 |
+
},
|
14535 |
+
{
|
14536 |
+
"epoch": 0.8,
|
14537 |
+
"grad_norm": 0.20332568883895874,
|
14538 |
+
"learning_rate": 4.434914678817118e-06,
|
14539 |
+
"loss": 0.0084,
|
14540 |
+
"step": 9955
|
14541 |
+
},
|
14542 |
+
{
|
14543 |
+
"epoch": 0.8,
|
14544 |
+
"grad_norm": 0.36842986941337585,
|
14545 |
+
"learning_rate": 4.425980523541499e-06,
|
14546 |
+
"loss": 0.0087,
|
14547 |
+
"step": 9960
|
14548 |
+
},
|
14549 |
+
{
|
14550 |
+
"epoch": 0.8,
|
14551 |
+
"grad_norm": 0.283997505903244,
|
14552 |
+
"learning_rate": 4.417046368265881e-06,
|
14553 |
+
"loss": 0.0086,
|
14554 |
+
"step": 9965
|
14555 |
+
},
|
14556 |
+
{
|
14557 |
+
"epoch": 0.8,
|
14558 |
+
"grad_norm": 0.42346441745758057,
|
14559 |
+
"learning_rate": 4.408112212990262e-06,
|
14560 |
+
"loss": 0.0078,
|
14561 |
+
"step": 9970
|
14562 |
+
},
|
14563 |
+
{
|
14564 |
+
"epoch": 0.8,
|
14565 |
+
"grad_norm": 0.18222256004810333,
|
14566 |
+
"learning_rate": 4.399178057714644e-06,
|
14567 |
+
"loss": 0.0074,
|
14568 |
+
"step": 9975
|
14569 |
+
},
|
14570 |
+
{
|
14571 |
+
"epoch": 0.8,
|
14572 |
+
"grad_norm": 0.6875673532485962,
|
14573 |
+
"learning_rate": 4.390243902439025e-06,
|
14574 |
+
"loss": 0.0102,
|
14575 |
+
"step": 9980
|
14576 |
+
},
|
14577 |
+
{
|
14578 |
+
"epoch": 0.8,
|
14579 |
+
"grad_norm": 0.24472391605377197,
|
14580 |
+
"learning_rate": 4.381309747163406e-06,
|
14581 |
+
"loss": 0.0065,
|
14582 |
+
"step": 9985
|
14583 |
+
},
|
14584 |
+
{
|
14585 |
+
"epoch": 0.8,
|
14586 |
+
"grad_norm": 0.26704928278923035,
|
14587 |
+
"learning_rate": 4.372375591887788e-06,
|
14588 |
+
"loss": 0.0083,
|
14589 |
+
"step": 9990
|
14590 |
+
},
|
14591 |
+
{
|
14592 |
+
"epoch": 0.8,
|
14593 |
+
"grad_norm": 0.5033184289932251,
|
14594 |
+
"learning_rate": 4.363441436612169e-06,
|
14595 |
+
"loss": 0.0067,
|
14596 |
+
"step": 9995
|
14597 |
+
},
|
14598 |
+
{
|
14599 |
+
"epoch": 0.8,
|
14600 |
+
"grad_norm": 0.781326413154602,
|
14601 |
+
"learning_rate": 4.35450728133655e-06,
|
14602 |
+
"loss": 0.006,
|
14603 |
+
"step": 10000
|
14604 |
+
},
|
14605 |
+
{
|
14606 |
+
"epoch": 0.8,
|
14607 |
+
"grad_norm": 0.46775344014167786,
|
14608 |
+
"learning_rate": 4.345573126060931e-06,
|
14609 |
+
"loss": 0.0087,
|
14610 |
+
"step": 10005
|
14611 |
+
},
|
14612 |
+
{
|
14613 |
+
"epoch": 0.8,
|
14614 |
+
"grad_norm": 0.3803477883338928,
|
14615 |
+
"learning_rate": 4.336638970785312e-06,
|
14616 |
+
"loss": 0.0098,
|
14617 |
+
"step": 10010
|
14618 |
+
},
|
14619 |
+
{
|
14620 |
+
"epoch": 0.81,
|
14621 |
+
"grad_norm": 0.23086823523044586,
|
14622 |
+
"learning_rate": 4.327704815509694e-06,
|
14623 |
+
"loss": 0.0089,
|
14624 |
+
"step": 10015
|
14625 |
+
},
|
14626 |
+
{
|
14627 |
+
"epoch": 0.81,
|
14628 |
+
"grad_norm": 0.18261872231960297,
|
14629 |
+
"learning_rate": 4.318770660234075e-06,
|
14630 |
+
"loss": 0.01,
|
14631 |
+
"step": 10020
|
14632 |
+
},
|
14633 |
+
{
|
14634 |
+
"epoch": 0.81,
|
14635 |
+
"grad_norm": 0.33674633502960205,
|
14636 |
+
"learning_rate": 4.309836504958457e-06,
|
14637 |
+
"loss": 0.0086,
|
14638 |
+
"step": 10025
|
14639 |
+
},
|
14640 |
+
{
|
14641 |
+
"epoch": 0.81,
|
14642 |
+
"grad_norm": 0.29859867691993713,
|
14643 |
+
"learning_rate": 4.300902349682838e-06,
|
14644 |
+
"loss": 0.009,
|
14645 |
+
"step": 10030
|
14646 |
+
},
|
14647 |
+
{
|
14648 |
+
"epoch": 0.81,
|
14649 |
+
"grad_norm": 0.40897712111473083,
|
14650 |
+
"learning_rate": 4.291968194407219e-06,
|
14651 |
+
"loss": 0.0092,
|
14652 |
+
"step": 10035
|
14653 |
+
},
|
14654 |
+
{
|
14655 |
+
"epoch": 0.81,
|
14656 |
+
"grad_norm": 0.7651856541633606,
|
14657 |
+
"learning_rate": 4.283034039131601e-06,
|
14658 |
+
"loss": 0.0108,
|
14659 |
+
"step": 10040
|
14660 |
+
},
|
14661 |
+
{
|
14662 |
+
"epoch": 0.81,
|
14663 |
+
"grad_norm": 0.7065618634223938,
|
14664 |
+
"learning_rate": 4.274099883855982e-06,
|
14665 |
+
"loss": 0.0132,
|
14666 |
+
"step": 10045
|
14667 |
+
},
|
14668 |
+
{
|
14669 |
+
"epoch": 0.81,
|
14670 |
+
"grad_norm": 0.4190121293067932,
|
14671 |
+
"learning_rate": 4.265165728580363e-06,
|
14672 |
+
"loss": 0.0063,
|
14673 |
+
"step": 10050
|
14674 |
+
},
|
14675 |
+
{
|
14676 |
+
"epoch": 0.81,
|
14677 |
+
"grad_norm": 0.47081393003463745,
|
14678 |
+
"learning_rate": 4.256231573304745e-06,
|
14679 |
+
"loss": 0.0065,
|
14680 |
+
"step": 10055
|
14681 |
+
},
|
14682 |
+
{
|
14683 |
+
"epoch": 0.81,
|
14684 |
+
"grad_norm": 0.3828545808792114,
|
14685 |
+
"learning_rate": 4.247297418029126e-06,
|
14686 |
+
"loss": 0.0087,
|
14687 |
+
"step": 10060
|
14688 |
+
},
|
14689 |
+
{
|
14690 |
+
"epoch": 0.81,
|
14691 |
+
"grad_norm": 0.7525375485420227,
|
14692 |
+
"learning_rate": 4.238363262753507e-06,
|
14693 |
+
"loss": 0.0113,
|
14694 |
+
"step": 10065
|
14695 |
+
},
|
14696 |
+
{
|
14697 |
+
"epoch": 0.81,
|
14698 |
+
"grad_norm": 0.34589239954948425,
|
14699 |
+
"learning_rate": 4.229429107477888e-06,
|
14700 |
+
"loss": 0.0078,
|
14701 |
+
"step": 10070
|
14702 |
+
},
|
14703 |
+
{
|
14704 |
+
"epoch": 0.81,
|
14705 |
+
"grad_norm": 0.5909443497657776,
|
14706 |
+
"learning_rate": 4.22049495220227e-06,
|
14707 |
+
"loss": 0.007,
|
14708 |
+
"step": 10075
|
14709 |
+
},
|
14710 |
+
{
|
14711 |
+
"epoch": 0.81,
|
14712 |
+
"grad_norm": 0.3668850362300873,
|
14713 |
+
"learning_rate": 4.211560796926651e-06,
|
14714 |
+
"loss": 0.0061,
|
14715 |
+
"step": 10080
|
14716 |
+
},
|
14717 |
+
{
|
14718 |
+
"epoch": 0.81,
|
14719 |
+
"grad_norm": 0.4989503026008606,
|
14720 |
+
"learning_rate": 4.202626641651033e-06,
|
14721 |
+
"loss": 0.0071,
|
14722 |
+
"step": 10085
|
14723 |
+
},
|
14724 |
+
{
|
14725 |
+
"epoch": 0.81,
|
14726 |
+
"grad_norm": 0.44953587651252747,
|
14727 |
+
"learning_rate": 4.193692486375414e-06,
|
14728 |
+
"loss": 0.0079,
|
14729 |
+
"step": 10090
|
14730 |
+
},
|
14731 |
+
{
|
14732 |
+
"epoch": 0.81,
|
14733 |
+
"grad_norm": 0.36224547028541565,
|
14734 |
+
"learning_rate": 4.184758331099795e-06,
|
14735 |
+
"loss": 0.0101,
|
14736 |
+
"step": 10095
|
14737 |
+
},
|
14738 |
+
{
|
14739 |
+
"epoch": 0.81,
|
14740 |
+
"grad_norm": 0.21505020558834076,
|
14741 |
+
"learning_rate": 4.175824175824177e-06,
|
14742 |
+
"loss": 0.0116,
|
14743 |
+
"step": 10100
|
14744 |
+
},
|
14745 |
+
{
|
14746 |
+
"epoch": 0.81,
|
14747 |
+
"grad_norm": 0.5628384351730347,
|
14748 |
+
"learning_rate": 4.166890020548558e-06,
|
14749 |
+
"loss": 0.0106,
|
14750 |
+
"step": 10105
|
14751 |
+
},
|
14752 |
+
{
|
14753 |
+
"epoch": 0.81,
|
14754 |
+
"grad_norm": 0.6173145174980164,
|
14755 |
+
"learning_rate": 4.157955865272939e-06,
|
14756 |
+
"loss": 0.0074,
|
14757 |
+
"step": 10110
|
14758 |
+
},
|
14759 |
+
{
|
14760 |
+
"epoch": 0.81,
|
14761 |
+
"grad_norm": 0.20209026336669922,
|
14762 |
+
"learning_rate": 4.14902170999732e-06,
|
14763 |
+
"loss": 0.0054,
|
14764 |
+
"step": 10115
|
14765 |
+
},
|
14766 |
+
{
|
14767 |
+
"epoch": 0.81,
|
14768 |
+
"grad_norm": 0.26845335960388184,
|
14769 |
+
"learning_rate": 4.140087554721701e-06,
|
14770 |
+
"loss": 0.0084,
|
14771 |
+
"step": 10120
|
14772 |
+
},
|
14773 |
+
{
|
14774 |
+
"epoch": 0.81,
|
14775 |
+
"grad_norm": 0.19294553995132446,
|
14776 |
+
"learning_rate": 4.131153399446083e-06,
|
14777 |
+
"loss": 0.0069,
|
14778 |
+
"step": 10125
|
14779 |
+
},
|
14780 |
+
{
|
14781 |
+
"epoch": 0.81,
|
14782 |
+
"grad_norm": 0.2686295211315155,
|
14783 |
+
"learning_rate": 4.122219244170464e-06,
|
14784 |
+
"loss": 0.0062,
|
14785 |
+
"step": 10130
|
14786 |
+
},
|
14787 |
+
{
|
14788 |
+
"epoch": 0.81,
|
14789 |
+
"grad_norm": 0.34871765971183777,
|
14790 |
+
"learning_rate": 4.113285088894846e-06,
|
14791 |
+
"loss": 0.0087,
|
14792 |
+
"step": 10135
|
14793 |
+
},
|
14794 |
+
{
|
14795 |
+
"epoch": 0.82,
|
14796 |
+
"grad_norm": 0.3453786373138428,
|
14797 |
+
"learning_rate": 4.104350933619227e-06,
|
14798 |
+
"loss": 0.008,
|
14799 |
+
"step": 10140
|
14800 |
+
},
|
14801 |
+
{
|
14802 |
+
"epoch": 0.82,
|
14803 |
+
"grad_norm": 0.4701385200023651,
|
14804 |
+
"learning_rate": 4.095416778343608e-06,
|
14805 |
+
"loss": 0.0077,
|
14806 |
+
"step": 10145
|
14807 |
+
},
|
14808 |
+
{
|
14809 |
+
"epoch": 0.82,
|
14810 |
+
"grad_norm": 0.13584518432617188,
|
14811 |
+
"learning_rate": 4.0864826230679896e-06,
|
14812 |
+
"loss": 0.0063,
|
14813 |
+
"step": 10150
|
14814 |
+
},
|
14815 |
+
{
|
14816 |
+
"epoch": 0.82,
|
14817 |
+
"grad_norm": 0.5030553936958313,
|
14818 |
+
"learning_rate": 4.077548467792371e-06,
|
14819 |
+
"loss": 0.0076,
|
14820 |
+
"step": 10155
|
14821 |
+
},
|
14822 |
+
{
|
14823 |
+
"epoch": 0.82,
|
14824 |
+
"grad_norm": 0.44598788022994995,
|
14825 |
+
"learning_rate": 4.0686143125167525e-06,
|
14826 |
+
"loss": 0.0067,
|
14827 |
+
"step": 10160
|
14828 |
+
},
|
14829 |
+
{
|
14830 |
+
"epoch": 0.82,
|
14831 |
+
"grad_norm": 0.2886448800563812,
|
14832 |
+
"learning_rate": 4.0596801572411335e-06,
|
14833 |
+
"loss": 0.0077,
|
14834 |
+
"step": 10165
|
14835 |
+
},
|
14836 |
+
{
|
14837 |
+
"epoch": 0.82,
|
14838 |
+
"grad_norm": 0.2822360098361969,
|
14839 |
+
"learning_rate": 4.0507460019655146e-06,
|
14840 |
+
"loss": 0.0078,
|
14841 |
+
"step": 10170
|
14842 |
+
},
|
14843 |
+
{
|
14844 |
+
"epoch": 0.82,
|
14845 |
+
"grad_norm": 0.5592710375785828,
|
14846 |
+
"learning_rate": 4.041811846689896e-06,
|
14847 |
+
"loss": 0.0076,
|
14848 |
+
"step": 10175
|
14849 |
+
},
|
14850 |
+
{
|
14851 |
+
"epoch": 0.82,
|
14852 |
+
"grad_norm": 0.310092031955719,
|
14853 |
+
"learning_rate": 4.032877691414277e-06,
|
14854 |
+
"loss": 0.0107,
|
14855 |
+
"step": 10180
|
14856 |
+
},
|
14857 |
+
{
|
14858 |
+
"epoch": 0.82,
|
14859 |
+
"grad_norm": 0.3115810751914978,
|
14860 |
+
"learning_rate": 4.0239435361386585e-06,
|
14861 |
+
"loss": 0.0087,
|
14862 |
+
"step": 10185
|
14863 |
+
},
|
14864 |
+
{
|
14865 |
+
"epoch": 0.82,
|
14866 |
+
"grad_norm": 0.9056434035301208,
|
14867 |
+
"learning_rate": 4.0150093808630396e-06,
|
14868 |
+
"loss": 0.0099,
|
14869 |
+
"step": 10190
|
14870 |
+
},
|
14871 |
+
{
|
14872 |
+
"epoch": 0.82,
|
14873 |
+
"grad_norm": 0.31844180822372437,
|
14874 |
+
"learning_rate": 4.006075225587421e-06,
|
14875 |
+
"loss": 0.0111,
|
14876 |
+
"step": 10195
|
14877 |
+
},
|
14878 |
+
{
|
14879 |
+
"epoch": 0.82,
|
14880 |
+
"grad_norm": 0.5329269766807556,
|
14881 |
+
"learning_rate": 3.9971410703118025e-06,
|
14882 |
+
"loss": 0.01,
|
14883 |
+
"step": 10200
|
14884 |
+
},
|
14885 |
+
{
|
14886 |
+
"epoch": 0.82,
|
14887 |
+
"eval_loss": 0.007998097687959671,
|
14888 |
+
"eval_pearson_cosine": 0.9672417410329249,
|
14889 |
+
"eval_pearson_dot": 0.965013249775285,
|
14890 |
+
"eval_pearson_euclidean": 0.9542839853098279,
|
14891 |
+
"eval_pearson_manhattan": 0.9528819643748915,
|
14892 |
+
"eval_pearson_max": 0.9672417410329249,
|
14893 |
+
"eval_runtime": 425.8541,
|
14894 |
+
"eval_samples_per_second": 1.174,
|
14895 |
+
"eval_spearman_cosine": 0.9626618986475944,
|
14896 |
+
"eval_spearman_dot": 0.9577223908895635,
|
14897 |
+
"eval_spearman_euclidean": 0.962258313033252,
|
14898 |
+
"eval_spearman_manhattan": 0.9619932559730238,
|
14899 |
+
"eval_spearman_max": 0.9626618986475944,
|
14900 |
+
"eval_steps_per_second": 1.174,
|
14901 |
+
"step": 10200
|
14902 |
+
},
|
14903 |
+
{
|
14904 |
+
"epoch": 0.82,
|
14905 |
+
"grad_norm": 0.3950428068637848,
|
14906 |
+
"learning_rate": 3.9882069150361835e-06,
|
14907 |
+
"loss": 0.0074,
|
14908 |
+
"step": 10205
|
14909 |
+
},
|
14910 |
+
{
|
14911 |
+
"epoch": 0.82,
|
14912 |
+
"grad_norm": 0.3376205563545227,
|
14913 |
+
"learning_rate": 3.979272759760565e-06,
|
14914 |
+
"loss": 0.0079,
|
14915 |
+
"step": 10210
|
14916 |
+
},
|
14917 |
+
{
|
14918 |
+
"epoch": 0.82,
|
14919 |
+
"grad_norm": 0.39471399784088135,
|
14920 |
+
"learning_rate": 3.9703386044849465e-06,
|
14921 |
+
"loss": 0.0087,
|
14922 |
+
"step": 10215
|
14923 |
+
},
|
14924 |
+
{
|
14925 |
+
"epoch": 0.82,
|
14926 |
+
"grad_norm": 0.42924797534942627,
|
14927 |
+
"learning_rate": 3.9614044492093275e-06,
|
14928 |
+
"loss": 0.0086,
|
14929 |
+
"step": 10220
|
14930 |
+
},
|
14931 |
+
{
|
14932 |
+
"epoch": 0.82,
|
14933 |
+
"grad_norm": 0.26988697052001953,
|
14934 |
+
"learning_rate": 3.9524702939337085e-06,
|
14935 |
+
"loss": 0.0076,
|
14936 |
+
"step": 10225
|
14937 |
+
},
|
14938 |
+
{
|
14939 |
+
"epoch": 0.82,
|
14940 |
+
"grad_norm": 0.5494524240493774,
|
14941 |
+
"learning_rate": 3.94353613865809e-06,
|
14942 |
+
"loss": 0.0096,
|
14943 |
+
"step": 10230
|
14944 |
+
},
|
14945 |
+
{
|
14946 |
+
"epoch": 0.82,
|
14947 |
+
"grad_norm": 0.39503470063209534,
|
14948 |
+
"learning_rate": 3.9346019833824715e-06,
|
14949 |
+
"loss": 0.0091,
|
14950 |
+
"step": 10235
|
14951 |
+
},
|
14952 |
+
{
|
14953 |
+
"epoch": 0.82,
|
14954 |
+
"grad_norm": 0.2849455177783966,
|
14955 |
+
"learning_rate": 3.9256678281068525e-06,
|
14956 |
+
"loss": 0.0066,
|
14957 |
+
"step": 10240
|
14958 |
+
},
|
14959 |
+
{
|
14960 |
+
"epoch": 0.82,
|
14961 |
+
"grad_norm": 0.2523050904273987,
|
14962 |
+
"learning_rate": 3.916733672831234e-06,
|
14963 |
+
"loss": 0.0076,
|
14964 |
+
"step": 10245
|
14965 |
+
},
|
14966 |
+
{
|
14967 |
+
"epoch": 0.82,
|
14968 |
+
"grad_norm": 0.4256332218647003,
|
14969 |
+
"learning_rate": 3.9077995175556154e-06,
|
14970 |
+
"loss": 0.0061,
|
14971 |
+
"step": 10250
|
14972 |
+
},
|
14973 |
+
{
|
14974 |
+
"epoch": 0.82,
|
14975 |
+
"grad_norm": 0.2837385833263397,
|
14976 |
+
"learning_rate": 3.8988653622799965e-06,
|
14977 |
+
"loss": 0.0069,
|
14978 |
+
"step": 10255
|
14979 |
+
},
|
14980 |
+
{
|
14981 |
+
"epoch": 0.82,
|
14982 |
+
"grad_norm": 0.44190511107444763,
|
14983 |
+
"learning_rate": 3.889931207004378e-06,
|
14984 |
+
"loss": 0.0059,
|
14985 |
+
"step": 10260
|
14986 |
+
},
|
14987 |
+
{
|
14988 |
+
"epoch": 0.83,
|
14989 |
+
"grad_norm": 0.3654380440711975,
|
14990 |
+
"learning_rate": 3.880997051728759e-06,
|
14991 |
+
"loss": 0.0071,
|
14992 |
+
"step": 10265
|
14993 |
+
},
|
14994 |
+
{
|
14995 |
+
"epoch": 0.83,
|
14996 |
+
"grad_norm": 0.356179416179657,
|
14997 |
+
"learning_rate": 3.872062896453141e-06,
|
14998 |
+
"loss": 0.005,
|
14999 |
+
"step": 10270
|
15000 |
+
},
|
15001 |
+
{
|
15002 |
+
"epoch": 0.83,
|
15003 |
+
"grad_norm": 0.4459650218486786,
|
15004 |
+
"learning_rate": 3.863128741177522e-06,
|
15005 |
+
"loss": 0.0074,
|
15006 |
+
"step": 10275
|
15007 |
+
},
|
15008 |
+
{
|
15009 |
+
"epoch": 0.83,
|
15010 |
+
"grad_norm": 0.6780635118484497,
|
15011 |
+
"learning_rate": 3.854194585901903e-06,
|
15012 |
+
"loss": 0.0101,
|
15013 |
+
"step": 10280
|
15014 |
+
},
|
15015 |
+
{
|
15016 |
+
"epoch": 0.83,
|
15017 |
+
"grad_norm": 0.42910513281822205,
|
15018 |
+
"learning_rate": 3.845260430626284e-06,
|
15019 |
+
"loss": 0.0087,
|
15020 |
+
"step": 10285
|
15021 |
+
},
|
15022 |
+
{
|
15023 |
+
"epoch": 0.83,
|
15024 |
+
"grad_norm": 0.38253623247146606,
|
15025 |
+
"learning_rate": 3.8363262753506654e-06,
|
15026 |
+
"loss": 0.0054,
|
15027 |
+
"step": 10290
|
15028 |
+
},
|
15029 |
+
{
|
15030 |
+
"epoch": 0.83,
|
15031 |
+
"grad_norm": 0.3616214990615845,
|
15032 |
+
"learning_rate": 3.827392120075047e-06,
|
15033 |
+
"loss": 0.0066,
|
15034 |
+
"step": 10295
|
15035 |
+
},
|
15036 |
+
{
|
15037 |
+
"epoch": 0.83,
|
15038 |
+
"grad_norm": 0.5730588436126709,
|
15039 |
+
"learning_rate": 3.818457964799428e-06,
|
15040 |
+
"loss": 0.0075,
|
15041 |
+
"step": 10300
|
15042 |
+
},
|
15043 |
+
{
|
15044 |
+
"epoch": 0.83,
|
15045 |
+
"grad_norm": 0.34027931094169617,
|
15046 |
+
"learning_rate": 3.80952380952381e-06,
|
15047 |
+
"loss": 0.0115,
|
15048 |
+
"step": 10305
|
15049 |
+
},
|
15050 |
+
{
|
15051 |
+
"epoch": 0.83,
|
15052 |
+
"grad_norm": 0.34853848814964294,
|
15053 |
+
"learning_rate": 3.8005896542481913e-06,
|
15054 |
+
"loss": 0.0098,
|
15055 |
+
"step": 10310
|
15056 |
+
},
|
15057 |
+
{
|
15058 |
+
"epoch": 0.83,
|
15059 |
+
"grad_norm": 0.3500560522079468,
|
15060 |
+
"learning_rate": 3.7916554989725723e-06,
|
15061 |
+
"loss": 0.0099,
|
15062 |
+
"step": 10315
|
15063 |
+
},
|
15064 |
+
{
|
15065 |
+
"epoch": 0.83,
|
15066 |
+
"grad_norm": 0.4545835256576538,
|
15067 |
+
"learning_rate": 3.7827213436969538e-06,
|
15068 |
+
"loss": 0.0075,
|
15069 |
+
"step": 10320
|
15070 |
+
},
|
15071 |
+
{
|
15072 |
+
"epoch": 0.83,
|
15073 |
+
"grad_norm": 0.3421791195869446,
|
15074 |
+
"learning_rate": 3.7737871884213352e-06,
|
15075 |
+
"loss": 0.0105,
|
15076 |
+
"step": 10325
|
15077 |
+
},
|
15078 |
+
{
|
15079 |
+
"epoch": 0.83,
|
15080 |
+
"grad_norm": 0.28388506174087524,
|
15081 |
+
"learning_rate": 3.7648530331457163e-06,
|
15082 |
+
"loss": 0.0135,
|
15083 |
+
"step": 10330
|
15084 |
+
},
|
15085 |
+
{
|
15086 |
+
"epoch": 0.83,
|
15087 |
+
"grad_norm": 0.6649767160415649,
|
15088 |
+
"learning_rate": 3.7559188778700977e-06,
|
15089 |
+
"loss": 0.0105,
|
15090 |
+
"step": 10335
|
15091 |
+
},
|
15092 |
+
{
|
15093 |
+
"epoch": 0.83,
|
15094 |
+
"grad_norm": 0.41207408905029297,
|
15095 |
+
"learning_rate": 3.7469847225944788e-06,
|
15096 |
+
"loss": 0.0065,
|
15097 |
+
"step": 10340
|
15098 |
+
},
|
15099 |
+
{
|
15100 |
+
"epoch": 0.83,
|
15101 |
+
"grad_norm": 0.3896176815032959,
|
15102 |
+
"learning_rate": 3.7380505673188607e-06,
|
15103 |
+
"loss": 0.0098,
|
15104 |
+
"step": 10345
|
15105 |
+
},
|
15106 |
+
{
|
15107 |
+
"epoch": 0.83,
|
15108 |
+
"grad_norm": 0.26551979780197144,
|
15109 |
+
"learning_rate": 3.7291164120432417e-06,
|
15110 |
+
"loss": 0.0088,
|
15111 |
+
"step": 10350
|
15112 |
+
},
|
15113 |
+
{
|
15114 |
+
"epoch": 0.83,
|
15115 |
+
"grad_norm": 0.22879204154014587,
|
15116 |
+
"learning_rate": 3.7201822567676227e-06,
|
15117 |
+
"loss": 0.0094,
|
15118 |
+
"step": 10355
|
15119 |
+
},
|
15120 |
+
{
|
15121 |
+
"epoch": 0.83,
|
15122 |
+
"grad_norm": 0.6052958369255066,
|
15123 |
+
"learning_rate": 3.711248101492004e-06,
|
15124 |
+
"loss": 0.0092,
|
15125 |
+
"step": 10360
|
15126 |
+
},
|
15127 |
+
{
|
15128 |
+
"epoch": 0.83,
|
15129 |
+
"grad_norm": 0.2694813311100006,
|
15130 |
+
"learning_rate": 3.7023139462163852e-06,
|
15131 |
+
"loss": 0.0054,
|
15132 |
+
"step": 10365
|
15133 |
+
},
|
15134 |
+
{
|
15135 |
+
"epoch": 0.83,
|
15136 |
+
"grad_norm": 0.3190039098262787,
|
15137 |
+
"learning_rate": 3.693379790940767e-06,
|
15138 |
+
"loss": 0.009,
|
15139 |
+
"step": 10370
|
15140 |
+
},
|
15141 |
+
{
|
15142 |
+
"epoch": 0.83,
|
15143 |
+
"grad_norm": 0.2353006899356842,
|
15144 |
+
"learning_rate": 3.684445635665148e-06,
|
15145 |
+
"loss": 0.0088,
|
15146 |
+
"step": 10375
|
15147 |
+
},
|
15148 |
+
{
|
15149 |
+
"epoch": 0.83,
|
15150 |
+
"grad_norm": 0.9957902431488037,
|
15151 |
+
"learning_rate": 3.675511480389529e-06,
|
15152 |
+
"loss": 0.0116,
|
15153 |
+
"step": 10380
|
15154 |
+
},
|
15155 |
+
{
|
15156 |
+
"epoch": 0.83,
|
15157 |
+
"grad_norm": 0.4154163897037506,
|
15158 |
+
"learning_rate": 3.6665773251139107e-06,
|
15159 |
+
"loss": 0.0055,
|
15160 |
+
"step": 10385
|
15161 |
+
},
|
15162 |
+
{
|
15163 |
+
"epoch": 0.84,
|
15164 |
+
"grad_norm": 0.3367329239845276,
|
15165 |
+
"learning_rate": 3.657643169838292e-06,
|
15166 |
+
"loss": 0.0061,
|
15167 |
+
"step": 10390
|
15168 |
+
},
|
15169 |
+
{
|
15170 |
+
"epoch": 0.84,
|
15171 |
+
"grad_norm": 0.3034825325012207,
|
15172 |
+
"learning_rate": 3.6487090145626736e-06,
|
15173 |
+
"loss": 0.0081,
|
15174 |
+
"step": 10395
|
15175 |
+
},
|
15176 |
+
{
|
15177 |
+
"epoch": 0.84,
|
15178 |
+
"grad_norm": 0.17845579981803894,
|
15179 |
+
"learning_rate": 3.6397748592870546e-06,
|
15180 |
+
"loss": 0.0043,
|
15181 |
+
"step": 10400
|
15182 |
}
|
15183 |
],
|
15184 |
"logging_steps": 5,
|