Training in progress, step 326, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step326/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step326/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step326/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step326/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step326/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step326/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step326/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step326/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step326/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step326/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step326/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step326/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step326/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step326/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step326/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step326/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +438 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 763470136
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa8b2208e339d0966b30c53e09a99df61e2311b931ce1a0d629c8ac892f616a5
|
3 |
size 763470136
|
last-checkpoint/global_step326/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43b3137f947c6139cf829b67978cdba814ebd02eaee9fdcc3c0f167fd35fd35b
|
3 |
+
size 289065424
|
last-checkpoint/global_step326/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6db33c332d8581358d602504c1c91f0d3a883b074cf661ac66a15b03dd40abd
|
3 |
+
size 289065424
|
last-checkpoint/global_step326/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc2873da28d4036072b8fb47729fcb07fd661ca126c821a1edd1b5e0e0e30097
|
3 |
+
size 289065424
|
last-checkpoint/global_step326/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2310adddf2072beb2ec0a8d7336c7313d0522f5d558ddf1404c757f814fa692
|
3 |
+
size 289065424
|
last-checkpoint/global_step326/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c0a3de1b7fabd73a2f8e028bc47bd34d2cadec6c6a7e0f55c1ec00eaa8d5f09
|
3 |
+
size 289065424
|
last-checkpoint/global_step326/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e65446abe8198c8cdae4ef4d6043c5b7cc00aaa255a4eb5fde655e3a2c814d26
|
3 |
+
size 289065424
|
last-checkpoint/global_step326/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99b1a452b0ea7e88f778e0e3e688314048124e3e186d2dbefa61af7e7fcb6d38
|
3 |
+
size 289065424
|
last-checkpoint/global_step326/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:feac3121144190c7b1081674f7d997011edfb3697de551be384f00cdc0ba3d16
|
3 |
+
size 289065424
|
last-checkpoint/global_step326/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99cf2bb29fbbb9b7a3cdf4a3f999edea48aca7423658b0613e3cad205c425e76
|
3 |
+
size 348711830
|
last-checkpoint/global_step326/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ae71c4246c7a236ee1d9aafa93f4d8184b878bddd8411262e99f470ba6a22d6
|
3 |
+
size 348711830
|
last-checkpoint/global_step326/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f353e9171125c04ba586a5e1399b5e436346795c2d781358b392ae0949ba32f3
|
3 |
+
size 348711830
|
last-checkpoint/global_step326/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b3bac07cff339e40e4d293b58834fa924ecd46e0a7004f2a7b23e4ae2dccd50
|
3 |
+
size 348711830
|
last-checkpoint/global_step326/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fdcdd82841eb104d9e8a4621cac38919d3eb554fb1fff0a673fde94a7dd6e2ed
|
3 |
+
size 348711830
|
last-checkpoint/global_step326/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:baf08c5cca6ee378c601258dcdcdc6750f8263163766efc8715e728d7374f16b
|
3 |
+
size 348711830
|
last-checkpoint/global_step326/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04351dfca49e5d3ecaa0f61d4f3ff75c1018f5ac923af4f043a17b42b16aa183
|
3 |
+
size 348711830
|
last-checkpoint/global_step326/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7323dd294955fbd4f4d06dbac88975d8a0dfb8cf0c1a2c0e35064347930eed11
|
3 |
+
size 348711830
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step326
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7773f084902535989bdb41582efe57404415ae441c0e941b91e35ed5bef8d6c
|
3 |
size 15920
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb6f1e872eaa090ac7fcbb7390762ebd32f4720fffac3f24df60938a27e68cd4
|
3 |
size 15920
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:168f0069d86758b09cb8707be4dc71abfea652954fd7c1fc7710c08989d444bb
|
3 |
size 15920
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6f3f1877afae4463c0da7af29b5016c2a4b26f8ab03a4bb94b21beefb8705ac
|
3 |
size 15920
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bc6bc1a489faad48156164ba681062284f4ce06e78099aed3eb21be38bdcae8
|
3 |
size 15920
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3874e1aa39cf2ac616290be1045cac257b998568136e9a70f9a79d503a77c1be
|
3 |
size 15920
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08d35009470b1536e71f50cca3e4f2587ed7caac64c4ff1c8286f89f2bdbd9d9
|
3 |
size 15920
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6aec33103c51dff2dd3527e0d1edfb46d84c375b17676323ddceb55412f0047
|
3 |
size 15920
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c1397d76155071779653df2de895577183fdb8d7655b1d6346b073c3c09830d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 66,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1895,6 +1895,440 @@
|
|
1895 |
"eval_samples_per_second": 1.794,
|
1896 |
"eval_steps_per_second": 0.126,
|
1897 |
"step": 264
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1898 |
}
|
1899 |
],
|
1900 |
"logging_steps": 1,
|
@@ -1909,12 +2343,12 @@
|
|
1909 |
"should_evaluate": false,
|
1910 |
"should_log": false,
|
1911 |
"should_save": true,
|
1912 |
-
"should_training_stop":
|
1913 |
},
|
1914 |
"attributes": {}
|
1915 |
}
|
1916 |
},
|
1917 |
-
"total_flos":
|
1918 |
"train_batch_size": 2,
|
1919 |
"trial_name": null,
|
1920 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 66,
|
6 |
+
"global_step": 326,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1895 |
"eval_samples_per_second": 1.794,
|
1896 |
"eval_steps_per_second": 0.126,
|
1897 |
"step": 264
|
1898 |
+
},
|
1899 |
+
{
|
1900 |
+
"epoch": 0.8128834355828221,
|
1901 |
+
"grad_norm": 0.2830531876648041,
|
1902 |
+
"learning_rate": 1.8540000807185192e-05,
|
1903 |
+
"loss": 1.9384,
|
1904 |
+
"step": 265
|
1905 |
+
},
|
1906 |
+
{
|
1907 |
+
"epoch": 0.8159509202453987,
|
1908 |
+
"grad_norm": 0.2670241830579454,
|
1909 |
+
"learning_rate": 1.827113894397003e-05,
|
1910 |
+
"loss": 1.8443,
|
1911 |
+
"step": 266
|
1912 |
+
},
|
1913 |
+
{
|
1914 |
+
"epoch": 0.8190184049079755,
|
1915 |
+
"grad_norm": 0.5199599677205632,
|
1916 |
+
"learning_rate": 1.800614841383898e-05,
|
1917 |
+
"loss": 1.9262,
|
1918 |
+
"step": 267
|
1919 |
+
},
|
1920 |
+
{
|
1921 |
+
"epoch": 0.8220858895705522,
|
1922 |
+
"grad_norm": 0.2979059774589199,
|
1923 |
+
"learning_rate": 1.7745057147595694e-05,
|
1924 |
+
"loss": 1.8408,
|
1925 |
+
"step": 268
|
1926 |
+
},
|
1927 |
+
{
|
1928 |
+
"epoch": 0.8251533742331288,
|
1929 |
+
"grad_norm": 0.3369017601149041,
|
1930 |
+
"learning_rate": 1.7487892665049627e-05,
|
1931 |
+
"loss": 1.9671,
|
1932 |
+
"step": 269
|
1933 |
+
},
|
1934 |
+
{
|
1935 |
+
"epoch": 0.8282208588957055,
|
1936 |
+
"grad_norm": 0.24208825522114308,
|
1937 |
+
"learning_rate": 1.7234682072115305e-05,
|
1938 |
+
"loss": 1.9101,
|
1939 |
+
"step": 270
|
1940 |
+
},
|
1941 |
+
{
|
1942 |
+
"epoch": 0.8312883435582822,
|
1943 |
+
"grad_norm": 0.3809834134932596,
|
1944 |
+
"learning_rate": 1.698545205795536e-05,
|
1945 |
+
"loss": 1.8445,
|
1946 |
+
"step": 271
|
1947 |
+
},
|
1948 |
+
{
|
1949 |
+
"epoch": 0.8343558282208589,
|
1950 |
+
"grad_norm": 0.27384739149228576,
|
1951 |
+
"learning_rate": 1.674022889216737e-05,
|
1952 |
+
"loss": 1.9337,
|
1953 |
+
"step": 272
|
1954 |
+
},
|
1955 |
+
{
|
1956 |
+
"epoch": 0.8374233128834356,
|
1957 |
+
"grad_norm": 0.25542052798806203,
|
1958 |
+
"learning_rate": 1.6499038422014962e-05,
|
1959 |
+
"loss": 1.8697,
|
1960 |
+
"step": 273
|
1961 |
+
},
|
1962 |
+
{
|
1963 |
+
"epoch": 0.8404907975460123,
|
1964 |
+
"grad_norm": 0.30649006891608727,
|
1965 |
+
"learning_rate": 1.626190606970346e-05,
|
1966 |
+
"loss": 1.8985,
|
1967 |
+
"step": 274
|
1968 |
+
},
|
1969 |
+
{
|
1970 |
+
"epoch": 0.843558282208589,
|
1971 |
+
"grad_norm": 0.27648461915446576,
|
1972 |
+
"learning_rate": 1.602885682970026e-05,
|
1973 |
+
"loss": 1.8851,
|
1974 |
+
"step": 275
|
1975 |
+
},
|
1976 |
+
{
|
1977 |
+
"epoch": 0.8466257668711656,
|
1978 |
+
"grad_norm": 1.1533982638871452,
|
1979 |
+
"learning_rate": 1.57999152661004e-05,
|
1980 |
+
"loss": 1.9318,
|
1981 |
+
"step": 276
|
1982 |
+
},
|
1983 |
+
{
|
1984 |
+
"epoch": 0.8496932515337423,
|
1985 |
+
"grad_norm": 0.33969524913455146,
|
1986 |
+
"learning_rate": 1.5575105510037396e-05,
|
1987 |
+
"loss": 2.0149,
|
1988 |
+
"step": 277
|
1989 |
+
},
|
1990 |
+
{
|
1991 |
+
"epoch": 0.852760736196319,
|
1992 |
+
"grad_norm": 0.5956725111127443,
|
1993 |
+
"learning_rate": 1.53544512571397e-05,
|
1994 |
+
"loss": 1.8834,
|
1995 |
+
"step": 278
|
1996 |
+
},
|
1997 |
+
{
|
1998 |
+
"epoch": 0.8558282208588958,
|
1999 |
+
"grad_norm": 0.5892298656241596,
|
2000 |
+
"learning_rate": 1.5137975765033205e-05,
|
2001 |
+
"loss": 1.8972,
|
2002 |
+
"step": 279
|
2003 |
+
},
|
2004 |
+
{
|
2005 |
+
"epoch": 0.8588957055214724,
|
2006 |
+
"grad_norm": 0.41593605055209165,
|
2007 |
+
"learning_rate": 1.4925701850889772e-05,
|
2008 |
+
"loss": 1.9427,
|
2009 |
+
"step": 280
|
2010 |
+
},
|
2011 |
+
{
|
2012 |
+
"epoch": 0.8619631901840491,
|
2013 |
+
"grad_norm": 0.2630748817948859,
|
2014 |
+
"learning_rate": 1.4717651889022202e-05,
|
2015 |
+
"loss": 1.9469,
|
2016 |
+
"step": 281
|
2017 |
+
},
|
2018 |
+
{
|
2019 |
+
"epoch": 0.8650306748466258,
|
2020 |
+
"grad_norm": 0.2232832403928089,
|
2021 |
+
"learning_rate": 1.4513847808525969e-05,
|
2022 |
+
"loss": 1.9662,
|
2023 |
+
"step": 282
|
2024 |
+
},
|
2025 |
+
{
|
2026 |
+
"epoch": 0.8680981595092024,
|
2027 |
+
"grad_norm": 0.31719749827250515,
|
2028 |
+
"learning_rate": 1.4314311090967786e-05,
|
2029 |
+
"loss": 1.9091,
|
2030 |
+
"step": 283
|
2031 |
+
},
|
2032 |
+
{
|
2033 |
+
"epoch": 0.8711656441717791,
|
2034 |
+
"grad_norm": 0.301123405840287,
|
2035 |
+
"learning_rate": 1.4119062768121433e-05,
|
2036 |
+
"loss": 1.8862,
|
2037 |
+
"step": 284
|
2038 |
+
},
|
2039 |
+
{
|
2040 |
+
"epoch": 0.8742331288343558,
|
2041 |
+
"grad_norm": 0.6726088360165043,
|
2042 |
+
"learning_rate": 1.3928123419750888e-05,
|
2043 |
+
"loss": 1.8739,
|
2044 |
+
"step": 285
|
2045 |
+
},
|
2046 |
+
{
|
2047 |
+
"epoch": 0.8773006134969326,
|
2048 |
+
"grad_norm": 0.4202167476604764,
|
2049 |
+
"learning_rate": 1.3741513171441176e-05,
|
2050 |
+
"loss": 1.9232,
|
2051 |
+
"step": 286
|
2052 |
+
},
|
2053 |
+
{
|
2054 |
+
"epoch": 0.8803680981595092,
|
2055 |
+
"grad_norm": 0.304988395998919,
|
2056 |
+
"learning_rate": 1.3559251692477087e-05,
|
2057 |
+
"loss": 1.9318,
|
2058 |
+
"step": 287
|
2059 |
+
},
|
2060 |
+
{
|
2061 |
+
"epoch": 0.8834355828220859,
|
2062 |
+
"grad_norm": 0.274507041819108,
|
2063 |
+
"learning_rate": 1.3381358193769976e-05,
|
2064 |
+
"loss": 1.8499,
|
2065 |
+
"step": 288
|
2066 |
+
},
|
2067 |
+
{
|
2068 |
+
"epoch": 0.8865030674846626,
|
2069 |
+
"grad_norm": 0.47861538421593386,
|
2070 |
+
"learning_rate": 1.320785142583284e-05,
|
2071 |
+
"loss": 1.9518,
|
2072 |
+
"step": 289
|
2073 |
+
},
|
2074 |
+
{
|
2075 |
+
"epoch": 0.8895705521472392,
|
2076 |
+
"grad_norm": 0.45942646770952145,
|
2077 |
+
"learning_rate": 1.3038749676803994e-05,
|
2078 |
+
"loss": 1.9109,
|
2079 |
+
"step": 290
|
2080 |
+
},
|
2081 |
+
{
|
2082 |
+
"epoch": 0.8926380368098159,
|
2083 |
+
"grad_norm": 0.27087716251353355,
|
2084 |
+
"learning_rate": 1.2874070770519428e-05,
|
2085 |
+
"loss": 1.8813,
|
2086 |
+
"step": 291
|
2087 |
+
},
|
2088 |
+
{
|
2089 |
+
"epoch": 0.8957055214723927,
|
2090 |
+
"grad_norm": 0.255203728473793,
|
2091 |
+
"learning_rate": 1.2713832064634126e-05,
|
2092 |
+
"loss": 1.873,
|
2093 |
+
"step": 292
|
2094 |
+
},
|
2095 |
+
{
|
2096 |
+
"epoch": 0.8987730061349694,
|
2097 |
+
"grad_norm": 0.40071001023936836,
|
2098 |
+
"learning_rate": 1.2558050448792515e-05,
|
2099 |
+
"loss": 1.9324,
|
2100 |
+
"step": 293
|
2101 |
+
},
|
2102 |
+
{
|
2103 |
+
"epoch": 0.901840490797546,
|
2104 |
+
"grad_norm": 0.33237213114045755,
|
2105 |
+
"learning_rate": 1.2406742342848248e-05,
|
2106 |
+
"loss": 1.96,
|
2107 |
+
"step": 294
|
2108 |
+
},
|
2109 |
+
{
|
2110 |
+
"epoch": 0.9049079754601227,
|
2111 |
+
"grad_norm": 0.2921583930232282,
|
2112 |
+
"learning_rate": 1.2259923695133503e-05,
|
2113 |
+
"loss": 1.8696,
|
2114 |
+
"step": 295
|
2115 |
+
},
|
2116 |
+
{
|
2117 |
+
"epoch": 0.9079754601226994,
|
2118 |
+
"grad_norm": 0.2753105203678559,
|
2119 |
+
"learning_rate": 1.2117609980777959e-05,
|
2120 |
+
"loss": 1.9038,
|
2121 |
+
"step": 296
|
2122 |
+
},
|
2123 |
+
{
|
2124 |
+
"epoch": 0.911042944785276,
|
2125 |
+
"grad_norm": 0.497963211949326,
|
2126 |
+
"learning_rate": 1.1979816200077707e-05,
|
2127 |
+
"loss": 1.9388,
|
2128 |
+
"step": 297
|
2129 |
+
},
|
2130 |
+
{
|
2131 |
+
"epoch": 0.9141104294478528,
|
2132 |
+
"grad_norm": 0.2474786285871462,
|
2133 |
+
"learning_rate": 1.1846556876914151e-05,
|
2134 |
+
"loss": 1.9544,
|
2135 |
+
"step": 298
|
2136 |
+
},
|
2137 |
+
{
|
2138 |
+
"epoch": 0.9171779141104295,
|
2139 |
+
"grad_norm": 0.26791445026050176,
|
2140 |
+
"learning_rate": 1.1717846057223144e-05,
|
2141 |
+
"loss": 1.9231,
|
2142 |
+
"step": 299
|
2143 |
+
},
|
2144 |
+
{
|
2145 |
+
"epoch": 0.9202453987730062,
|
2146 |
+
"grad_norm": 0.3923236183364779,
|
2147 |
+
"learning_rate": 1.159369730751452e-05,
|
2148 |
+
"loss": 1.8686,
|
2149 |
+
"step": 300
|
2150 |
+
},
|
2151 |
+
{
|
2152 |
+
"epoch": 0.9233128834355828,
|
2153 |
+
"grad_norm": 0.36556731516768504,
|
2154 |
+
"learning_rate": 1.1474123713442137e-05,
|
2155 |
+
"loss": 1.9278,
|
2156 |
+
"step": 301
|
2157 |
+
},
|
2158 |
+
{
|
2159 |
+
"epoch": 0.9263803680981595,
|
2160 |
+
"grad_norm": 0.24192425833135245,
|
2161 |
+
"learning_rate": 1.1359137878424578e-05,
|
2162 |
+
"loss": 1.8853,
|
2163 |
+
"step": 302
|
2164 |
+
},
|
2165 |
+
{
|
2166 |
+
"epoch": 0.9294478527607362,
|
2167 |
+
"grad_norm": 0.31690600810620534,
|
2168 |
+
"learning_rate": 1.1248751922316776e-05,
|
2169 |
+
"loss": 1.9523,
|
2170 |
+
"step": 303
|
2171 |
+
},
|
2172 |
+
{
|
2173 |
+
"epoch": 0.9325153374233128,
|
2174 |
+
"grad_norm": 0.27955140199036155,
|
2175 |
+
"learning_rate": 1.1142977480132493e-05,
|
2176 |
+
"loss": 1.8225,
|
2177 |
+
"step": 304
|
2178 |
+
},
|
2179 |
+
{
|
2180 |
+
"epoch": 0.9355828220858896,
|
2181 |
+
"grad_norm": 0.2831264739725871,
|
2182 |
+
"learning_rate": 1.104182570081797e-05,
|
2183 |
+
"loss": 1.9258,
|
2184 |
+
"step": 305
|
2185 |
+
},
|
2186 |
+
{
|
2187 |
+
"epoch": 0.9386503067484663,
|
2188 |
+
"grad_norm": 0.26580496177825247,
|
2189 |
+
"learning_rate": 1.0945307246076797e-05,
|
2190 |
+
"loss": 1.9327,
|
2191 |
+
"step": 306
|
2192 |
+
},
|
2193 |
+
{
|
2194 |
+
"epoch": 0.941717791411043,
|
2195 |
+
"grad_norm": 0.30887069355917346,
|
2196 |
+
"learning_rate": 1.0853432289246138e-05,
|
2197 |
+
"loss": 1.9412,
|
2198 |
+
"step": 307
|
2199 |
+
},
|
2200 |
+
{
|
2201 |
+
"epoch": 0.9447852760736196,
|
2202 |
+
"grad_norm": 0.44810137462917216,
|
2203 |
+
"learning_rate": 1.076621051422442e-05,
|
2204 |
+
"loss": 1.9057,
|
2205 |
+
"step": 308
|
2206 |
+
},
|
2207 |
+
{
|
2208 |
+
"epoch": 0.9478527607361963,
|
2209 |
+
"grad_norm": 0.27583855429775517,
|
2210 |
+
"learning_rate": 1.0683651114450641e-05,
|
2211 |
+
"loss": 1.9357,
|
2212 |
+
"step": 309
|
2213 |
+
},
|
2214 |
+
{
|
2215 |
+
"epoch": 0.950920245398773,
|
2216 |
+
"grad_norm": 0.26050390516719396,
|
2217 |
+
"learning_rate": 1.0605762791935325e-05,
|
2218 |
+
"loss": 1.8674,
|
2219 |
+
"step": 310
|
2220 |
+
},
|
2221 |
+
{
|
2222 |
+
"epoch": 0.9539877300613497,
|
2223 |
+
"grad_norm": 0.26034125726942287,
|
2224 |
+
"learning_rate": 1.0532553756343328e-05,
|
2225 |
+
"loss": 1.8837,
|
2226 |
+
"step": 311
|
2227 |
+
},
|
2228 |
+
{
|
2229 |
+
"epoch": 0.9570552147239264,
|
2230 |
+
"grad_norm": 0.380331760419281,
|
2231 |
+
"learning_rate": 1.0464031724128512e-05,
|
2232 |
+
"loss": 1.9202,
|
2233 |
+
"step": 312
|
2234 |
+
},
|
2235 |
+
{
|
2236 |
+
"epoch": 0.9601226993865031,
|
2237 |
+
"grad_norm": 0.3024899052220286,
|
2238 |
+
"learning_rate": 1.0400203917720394e-05,
|
2239 |
+
"loss": 1.833,
|
2240 |
+
"step": 313
|
2241 |
+
},
|
2242 |
+
{
|
2243 |
+
"epoch": 0.9631901840490797,
|
2244 |
+
"grad_norm": 0.26156906536760005,
|
2245 |
+
"learning_rate": 1.0341077064762893e-05,
|
2246 |
+
"loss": 1.8538,
|
2247 |
+
"step": 314
|
2248 |
+
},
|
2249 |
+
{
|
2250 |
+
"epoch": 0.9662576687116564,
|
2251 |
+
"grad_norm": 0.5419644400783428,
|
2252 |
+
"learning_rate": 1.0286657397405204e-05,
|
2253 |
+
"loss": 1.8956,
|
2254 |
+
"step": 315
|
2255 |
+
},
|
2256 |
+
{
|
2257 |
+
"epoch": 0.9693251533742331,
|
2258 |
+
"grad_norm": 0.2754473793756419,
|
2259 |
+
"learning_rate": 1.0236950651644922e-05,
|
2260 |
+
"loss": 1.8821,
|
2261 |
+
"step": 316
|
2262 |
+
},
|
2263 |
+
{
|
2264 |
+
"epoch": 0.9723926380368099,
|
2265 |
+
"grad_norm": 0.32743295245170423,
|
2266 |
+
"learning_rate": 1.019196206672345e-05,
|
2267 |
+
"loss": 1.8669,
|
2268 |
+
"step": 317
|
2269 |
+
},
|
2270 |
+
{
|
2271 |
+
"epoch": 0.9754601226993865,
|
2272 |
+
"grad_norm": 0.2983793501294546,
|
2273 |
+
"learning_rate": 1.0151696384573753e-05,
|
2274 |
+
"loss": 1.8806,
|
2275 |
+
"step": 318
|
2276 |
+
},
|
2277 |
+
{
|
2278 |
+
"epoch": 0.9785276073619632,
|
2279 |
+
"grad_norm": 0.274678179585171,
|
2280 |
+
"learning_rate": 1.011615784932056e-05,
|
2281 |
+
"loss": 1.9428,
|
2282 |
+
"step": 319
|
2283 |
+
},
|
2284 |
+
{
|
2285 |
+
"epoch": 0.9815950920245399,
|
2286 |
+
"grad_norm": 0.802831711997894,
|
2287 |
+
"learning_rate": 1.0085350206833016e-05,
|
2288 |
+
"loss": 1.8988,
|
2289 |
+
"step": 320
|
2290 |
+
},
|
2291 |
+
{
|
2292 |
+
"epoch": 0.9846625766871165,
|
2293 |
+
"grad_norm": 0.36523952422202455,
|
2294 |
+
"learning_rate": 1.0059276704329856e-05,
|
2295 |
+
"loss": 1.8695,
|
2296 |
+
"step": 321
|
2297 |
+
},
|
2298 |
+
{
|
2299 |
+
"epoch": 0.9877300613496932,
|
2300 |
+
"grad_norm": 0.2857793976397457,
|
2301 |
+
"learning_rate": 1.003794009003713e-05,
|
2302 |
+
"loss": 1.8923,
|
2303 |
+
"step": 322
|
2304 |
+
},
|
2305 |
+
{
|
2306 |
+
"epoch": 0.99079754601227,
|
2307 |
+
"grad_norm": 0.306887686398712,
|
2308 |
+
"learning_rate": 1.0021342612898534e-05,
|
2309 |
+
"loss": 1.9541,
|
2310 |
+
"step": 323
|
2311 |
+
},
|
2312 |
+
{
|
2313 |
+
"epoch": 0.9938650306748467,
|
2314 |
+
"grad_norm": 0.5124292513803443,
|
2315 |
+
"learning_rate": 1.0009486022338391e-05,
|
2316 |
+
"loss": 1.9622,
|
2317 |
+
"step": 324
|
2318 |
+
},
|
2319 |
+
{
|
2320 |
+
"epoch": 0.9969325153374233,
|
2321 |
+
"grad_norm": 0.27281561169770374,
|
2322 |
+
"learning_rate": 1.0002371568077212e-05,
|
2323 |
+
"loss": 1.9336,
|
2324 |
+
"step": 325
|
2325 |
+
},
|
2326 |
+
{
|
2327 |
+
"epoch": 1.0,
|
2328 |
+
"grad_norm": 0.28851290398135704,
|
2329 |
+
"learning_rate": 1e-05,
|
2330 |
+
"loss": 1.8766,
|
2331 |
+
"step": 326
|
2332 |
}
|
2333 |
],
|
2334 |
"logging_steps": 1,
|
|
|
2343 |
"should_evaluate": false,
|
2344 |
"should_log": false,
|
2345 |
"should_save": true,
|
2346 |
+
"should_training_stop": true
|
2347 |
},
|
2348 |
"attributes": {}
|
2349 |
}
|
2350 |
},
|
2351 |
+
"total_flos": 355990511812608.0,
|
2352 |
"train_batch_size": 2,
|
2353 |
"trial_name": null,
|
2354 |
"trial_params": null
|