Training in progress, step 263, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +95 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13587864
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7ce00d367a44025a1b2da50a0ae92cee71ebcbcead329d510ab87a96278e546
|
3 |
size 13587864
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27273018
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95f45bad474ddce38decca2703f28ddd5b9a0a09fe2b16720a3653880f6a0601
|
3 |
size 27273018
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:046b00f812867968a1818522ac11681f858ea22743cfa05206d826f8045b9556
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd2ead64f33343a280abdcb643df0d01950c3c0b6535ac38a3a98d6ac73c83cd
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1aabf29ec0654883753d299afbd096dd4035a6709b53bf61f2dfd484c605863
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9cc7c048ba9c68e39fb9b9445846747d668b45aba649c5a4c4593f02246b45f0
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31fa8d31bc0f0c1d5f52cc48b24cec7fdded44317d4f4282fb7b9258ac0bfb34
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.6708096265792847,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-250",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 25,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1845,6 +1845,97 @@
|
|
1845 |
"eval_samples_per_second": 178.136,
|
1846 |
"eval_steps_per_second": 46.315,
|
1847 |
"step": 250
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1848 |
}
|
1849 |
],
|
1850 |
"logging_steps": 1,
|
@@ -1868,12 +1959,12 @@
|
|
1868 |
"should_evaluate": false,
|
1869 |
"should_log": false,
|
1870 |
"should_save": true,
|
1871 |
-
"should_training_stop":
|
1872 |
},
|
1873 |
"attributes": {}
|
1874 |
}
|
1875 |
},
|
1876 |
-
"total_flos": 1.
|
1877 |
"train_batch_size": 1,
|
1878 |
"trial_name": null,
|
1879 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 1.6708096265792847,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-250",
|
4 |
+
"epoch": 2.0057197330791228,
|
5 |
"eval_steps": 25,
|
6 |
+
"global_step": 263,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1845 |
"eval_samples_per_second": 178.136,
|
1846 |
"eval_steps_per_second": 46.315,
|
1847 |
"step": 250
|
1848 |
+
},
|
1849 |
+
{
|
1850 |
+
"epoch": 1.9142040038131554,
|
1851 |
+
"grad_norm": 0.4055742621421814,
|
1852 |
+
"learning_rate": 3.146117115475456e-05,
|
1853 |
+
"loss": 1.6014,
|
1854 |
+
"step": 251
|
1855 |
+
},
|
1856 |
+
{
|
1857 |
+
"epoch": 1.9218303145853195,
|
1858 |
+
"grad_norm": 0.3974815607070923,
|
1859 |
+
"learning_rate": 3.1228144217249694e-05,
|
1860 |
+
"loss": 1.5712,
|
1861 |
+
"step": 252
|
1862 |
+
},
|
1863 |
+
{
|
1864 |
+
"epoch": 1.9294566253574832,
|
1865 |
+
"grad_norm": 0.4176608920097351,
|
1866 |
+
"learning_rate": 3.1015262829174156e-05,
|
1867 |
+
"loss": 1.5899,
|
1868 |
+
"step": 253
|
1869 |
+
},
|
1870 |
+
{
|
1871 |
+
"epoch": 1.9370829361296473,
|
1872 |
+
"grad_norm": 0.4495166540145874,
|
1873 |
+
"learning_rate": 3.082255904968193e-05,
|
1874 |
+
"loss": 1.636,
|
1875 |
+
"step": 254
|
1876 |
+
},
|
1877 |
+
{
|
1878 |
+
"epoch": 1.9447092469018112,
|
1879 |
+
"grad_norm": 0.46961352229118347,
|
1880 |
+
"learning_rate": 3.065006189925343e-05,
|
1881 |
+
"loss": 1.6164,
|
1882 |
+
"step": 255
|
1883 |
+
},
|
1884 |
+
{
|
1885 |
+
"epoch": 1.9523355576739752,
|
1886 |
+
"grad_norm": 0.536354124546051,
|
1887 |
+
"learning_rate": 3.049779735532497e-05,
|
1888 |
+
"loss": 1.7097,
|
1889 |
+
"step": 256
|
1890 |
+
},
|
1891 |
+
{
|
1892 |
+
"epoch": 1.9599618684461393,
|
1893 |
+
"grad_norm": 0.41169190406799316,
|
1894 |
+
"learning_rate": 3.036578834837682e-05,
|
1895 |
+
"loss": 1.554,
|
1896 |
+
"step": 257
|
1897 |
+
},
|
1898 |
+
{
|
1899 |
+
"epoch": 1.967588179218303,
|
1900 |
+
"grad_norm": 0.3979185223579407,
|
1901 |
+
"learning_rate": 3.025405475847986e-05,
|
1902 |
+
"loss": 1.5038,
|
1903 |
+
"step": 258
|
1904 |
+
},
|
1905 |
+
{
|
1906 |
+
"epoch": 1.9752144899904671,
|
1907 |
+
"grad_norm": 0.3882528245449066,
|
1908 |
+
"learning_rate": 3.0162613412301724e-05,
|
1909 |
+
"loss": 1.5635,
|
1910 |
+
"step": 259
|
1911 |
+
},
|
1912 |
+
{
|
1913 |
+
"epoch": 1.982840800762631,
|
1914 |
+
"grad_norm": 0.4156327247619629,
|
1915 |
+
"learning_rate": 3.0091478080572808e-05,
|
1916 |
+
"loss": 1.61,
|
1917 |
+
"step": 260
|
1918 |
+
},
|
1919 |
+
{
|
1920 |
+
"epoch": 1.990467111534795,
|
1921 |
+
"grad_norm": 0.4123241603374481,
|
1922 |
+
"learning_rate": 3.0040659476012428e-05,
|
1923 |
+
"loss": 1.6104,
|
1924 |
+
"step": 261
|
1925 |
+
},
|
1926 |
+
{
|
1927 |
+
"epoch": 1.998093422306959,
|
1928 |
+
"grad_norm": 0.45598259568214417,
|
1929 |
+
"learning_rate": 3.0010165251715492e-05,
|
1930 |
+
"loss": 1.6804,
|
1931 |
+
"step": 262
|
1932 |
+
},
|
1933 |
+
{
|
1934 |
+
"epoch": 2.0057197330791228,
|
1935 |
+
"grad_norm": 1.43690025806427,
|
1936 |
+
"learning_rate": 2.9999999999999997e-05,
|
1937 |
+
"loss": 3.2788,
|
1938 |
+
"step": 263
|
1939 |
}
|
1940 |
],
|
1941 |
"logging_steps": 1,
|
|
|
1959 |
"should_evaluate": false,
|
1960 |
"should_log": false,
|
1961 |
"should_save": true,
|
1962 |
+
"should_training_stop": true
|
1963 |
},
|
1964 |
"attributes": {}
|
1965 |
}
|
1966 |
},
|
1967 |
+
"total_flos": 1.1684571244815974e+17,
|
1968 |
"train_batch_size": 1,
|
1969 |
"trial_name": null,
|
1970 |
"trial_params": null
|