RoyJoy commited on
Commit
3029a19
·
verified ·
1 Parent(s): 416f404

Training in progress, step 263, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cce123c0683ef9cd6975eb4e7a421ac1277037ddb1f56ad868f4857ff4175af
3
  size 13587864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7ce00d367a44025a1b2da50a0ae92cee71ebcbcead329d510ab87a96278e546
3
  size 13587864
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8682728647446457022a625059062c8515250852f037491dc6e4ba299841c1d0
3
  size 27273018
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95f45bad474ddce38decca2703f28ddd5b9a0a09fe2b16720a3653880f6a0601
3
  size 27273018
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:050e050c2c08370bd1f9ba1c8620beabe7ff029584c29e4cd6c089022f033e90
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:046b00f812867968a1818522ac11681f858ea22743cfa05206d826f8045b9556
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cf8a26ba280c3e7ef5570adf2f8b789f2d472d32e5416b32aa7525b9226e0be
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd2ead64f33343a280abdcb643df0d01950c3c0b6535ac38a3a98d6ac73c83cd
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3fbc12a030d5fdd31b311d40ef25f1be4dcd3dcc88032d5c293f225b052d01f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1aabf29ec0654883753d299afbd096dd4035a6709b53bf61f2dfd484c605863
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39522c45970a3ae5ecbf73e1051ca4989117a8b949dcd4b7f4ee9663321048b0
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cc7c048ba9c68e39fb9b9445846747d668b45aba649c5a4c4593f02246b45f0
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8f008ed445b652736016defc4807eff9b5ec48a500ab7e9db898ce35023867e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31fa8d31bc0f0c1d5f52cc48b24cec7fdded44317d4f4282fb7b9258ac0bfb34
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.6708096265792847,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
- "epoch": 1.9065776930409915,
5
  "eval_steps": 25,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1845,6 +1845,97 @@
1845
  "eval_samples_per_second": 178.136,
1846
  "eval_steps_per_second": 46.315,
1847
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1848
  }
1849
  ],
1850
  "logging_steps": 1,
@@ -1868,12 +1959,12 @@
1868
  "should_evaluate": false,
1869
  "should_log": false,
1870
  "should_save": true,
1871
- "should_training_stop": false
1872
  },
1873
  "attributes": {}
1874
  }
1875
  },
1876
- "total_flos": 1.11070068867072e+17,
1877
  "train_batch_size": 1,
1878
  "trial_name": null,
1879
  "trial_params": null
 
1
  {
2
  "best_metric": 1.6708096265792847,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
+ "epoch": 2.0057197330791228,
5
  "eval_steps": 25,
6
+ "global_step": 263,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1845
  "eval_samples_per_second": 178.136,
1846
  "eval_steps_per_second": 46.315,
1847
  "step": 250
1848
+ },
1849
+ {
1850
+ "epoch": 1.9142040038131554,
1851
+ "grad_norm": 0.4055742621421814,
1852
+ "learning_rate": 3.146117115475456e-05,
1853
+ "loss": 1.6014,
1854
+ "step": 251
1855
+ },
1856
+ {
1857
+ "epoch": 1.9218303145853195,
1858
+ "grad_norm": 0.3974815607070923,
1859
+ "learning_rate": 3.1228144217249694e-05,
1860
+ "loss": 1.5712,
1861
+ "step": 252
1862
+ },
1863
+ {
1864
+ "epoch": 1.9294566253574832,
1865
+ "grad_norm": 0.4176608920097351,
1866
+ "learning_rate": 3.1015262829174156e-05,
1867
+ "loss": 1.5899,
1868
+ "step": 253
1869
+ },
1870
+ {
1871
+ "epoch": 1.9370829361296473,
1872
+ "grad_norm": 0.4495166540145874,
1873
+ "learning_rate": 3.082255904968193e-05,
1874
+ "loss": 1.636,
1875
+ "step": 254
1876
+ },
1877
+ {
1878
+ "epoch": 1.9447092469018112,
1879
+ "grad_norm": 0.46961352229118347,
1880
+ "learning_rate": 3.065006189925343e-05,
1881
+ "loss": 1.6164,
1882
+ "step": 255
1883
+ },
1884
+ {
1885
+ "epoch": 1.9523355576739752,
1886
+ "grad_norm": 0.536354124546051,
1887
+ "learning_rate": 3.049779735532497e-05,
1888
+ "loss": 1.7097,
1889
+ "step": 256
1890
+ },
1891
+ {
1892
+ "epoch": 1.9599618684461393,
1893
+ "grad_norm": 0.41169190406799316,
1894
+ "learning_rate": 3.036578834837682e-05,
1895
+ "loss": 1.554,
1896
+ "step": 257
1897
+ },
1898
+ {
1899
+ "epoch": 1.967588179218303,
1900
+ "grad_norm": 0.3979185223579407,
1901
+ "learning_rate": 3.025405475847986e-05,
1902
+ "loss": 1.5038,
1903
+ "step": 258
1904
+ },
1905
+ {
1906
+ "epoch": 1.9752144899904671,
1907
+ "grad_norm": 0.3882528245449066,
1908
+ "learning_rate": 3.0162613412301724e-05,
1909
+ "loss": 1.5635,
1910
+ "step": 259
1911
+ },
1912
+ {
1913
+ "epoch": 1.982840800762631,
1914
+ "grad_norm": 0.4156327247619629,
1915
+ "learning_rate": 3.0091478080572808e-05,
1916
+ "loss": 1.61,
1917
+ "step": 260
1918
+ },
1919
+ {
1920
+ "epoch": 1.990467111534795,
1921
+ "grad_norm": 0.4123241603374481,
1922
+ "learning_rate": 3.0040659476012428e-05,
1923
+ "loss": 1.6104,
1924
+ "step": 261
1925
+ },
1926
+ {
1927
+ "epoch": 1.998093422306959,
1928
+ "grad_norm": 0.45598259568214417,
1929
+ "learning_rate": 3.0010165251715492e-05,
1930
+ "loss": 1.6804,
1931
+ "step": 262
1932
+ },
1933
+ {
1934
+ "epoch": 2.0057197330791228,
1935
+ "grad_norm": 1.43690025806427,
1936
+ "learning_rate": 2.9999999999999997e-05,
1937
+ "loss": 3.2788,
1938
+ "step": 263
1939
  }
1940
  ],
1941
  "logging_steps": 1,
 
1959
  "should_evaluate": false,
1960
  "should_log": false,
1961
  "should_save": true,
1962
+ "should_training_stop": true
1963
  },
1964
  "attributes": {}
1965
  }
1966
  },
1967
+ "total_flos": 1.1684571244815974e+17,
1968
  "train_batch_size": 1,
1969
  "trial_name": null,
1970
  "trial_params": null