g4rg commited on
Commit
9805734
1 Parent(s): 2b61c29

Training in progress, step 198, checkpoint

Browse files
Files changed (28) hide show
  1. last-checkpoint/adapter_model.safetensors +1 -1
  2. last-checkpoint/global_step198/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step198/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step198/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step198/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step198/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step198/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step198/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step198/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  10. last-checkpoint/global_step198/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step198/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step198/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step198/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step198/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step198/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step198/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/global_step198/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  18. last-checkpoint/latest +1 -1
  19. last-checkpoint/rng_state_0.pth +1 -1
  20. last-checkpoint/rng_state_1.pth +1 -1
  21. last-checkpoint/rng_state_2.pth +1 -1
  22. last-checkpoint/rng_state_3.pth +1 -1
  23. last-checkpoint/rng_state_4.pth +1 -1
  24. last-checkpoint/rng_state_5.pth +1 -1
  25. last-checkpoint/rng_state_6.pth +1 -1
  26. last-checkpoint/rng_state_7.pth +1 -1
  27. last-checkpoint/scheduler.pt +1 -1
  28. last-checkpoint/trainer_state.json +473 -3
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33b1027b21df86a9ac1c25a185657bca1afb488a02b9101ce864a4e74d409fce
3
  size 763470136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92aa718963ff1a158ab63158709261c7329af2ae34ebe9805357cdb7a33e38de
3
  size 763470136
last-checkpoint/global_step198/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:184ca24d308598fcfdc1eff063b3844a9fe07c83450ba68204baea754acd2b6c
3
+ size 289064656
last-checkpoint/global_step198/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f90faf99e52bcaf7883e7d0d984167b797b6a3ef426ddc5a01dcd116fa6216b6
3
+ size 289064656
last-checkpoint/global_step198/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6222a03d2881b2d32dff03e27d53cab95de99b56844cb51944277c03b2573f1d
3
+ size 289064656
last-checkpoint/global_step198/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:822222cd909aa8f2f0089f7216c587624099a193d544e662c584574b496ef1bb
3
+ size 289064656
last-checkpoint/global_step198/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38e9c407ef9c94d80e6849be71df10c0f78dd80597b665cc36480ec6de1d2f3a
3
+ size 289064656
last-checkpoint/global_step198/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36b716a34fca1e67d6f52f91eb4a50179e59d9d516991954ab69fb4a5eea15a5
3
+ size 289064656
last-checkpoint/global_step198/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46e90af09b5072fd5c2aa766837b4c41d6c20f4942eebb5720e785ffe39e336a
3
+ size 289064656
last-checkpoint/global_step198/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c59850584cb3af418bce4f87188bafc18552879efd66684c6a43ab0be85edf6
3
+ size 289064656
last-checkpoint/global_step198/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05e5052d8fdfb265a4a9e86a812d17a33a0e2e85bfdb05e52437c5634475bb2a
3
+ size 348711830
last-checkpoint/global_step198/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af29e15925b58b547d3e9448345ec03f39e0987c8eb8855f21e9cedfea0b0546
3
+ size 348711830
last-checkpoint/global_step198/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a70597656b52052cfcb1b6823598ef703475dd92dac4d0f1b654c0315dff6c14
3
+ size 348711830
last-checkpoint/global_step198/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b18c2b5468e85c7def27a12217ccf2c251cd8cd7d5abafe5559592bed2944108
3
+ size 348711830
last-checkpoint/global_step198/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13ea2b7e64c74d422fb18d911fe150c9644c1417470ada198c167c2462dc9440
3
+ size 348711830
last-checkpoint/global_step198/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3226f57d0eda0cb9f14ea96069c4660e02a76a78729fb66f1c1d15c017a929d
3
+ size 348711830
last-checkpoint/global_step198/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6f6e07560a2a29eb4d33bd00e84f2936176d5d1c1b3a087fcffd977c882671d
3
+ size 348711830
last-checkpoint/global_step198/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c1ffc0d5782cf1375cb74716ce8b3c27fc6cddbee62670e0bf468162a0e546e
3
+ size 348711830
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step132
 
1
+ global_step198
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ca402835f1af6d48f2f47ac363c7097358373e395ec83d7eb3d57ddbb0a4b2d
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b659790db5a549bc94a52bd0661c6c5e6c19beea5b259996f6ed9fe2149516f2
3
  size 15920
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a82bbd72da921737fac229854d3f27169eee4db7ddeacdba4a7199bad357bf3c
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fedd424305bc76c60abafd8b0806d3107fa0fd9dcab69abdd8a175961c5d292
3
  size 15920
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5972e7fa3b67599264ff7edaf7cea513fbe8d18030796597e4baae2d425cad3
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:018b23a19c9fcba72d4cdfa2c9fc3962ed3bc3bd0e06e1ebeb979a60bbcca587
3
  size 15920
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b53ae0f3c148cf1921cf63943d12ae8efd4e59d00acc2c75a186e9cb04f50b9c
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:655e1d8eba47928d19c120d020c1358f82da6b7b643dec3c9fd55e5052edd4fe
3
  size 15920
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84863aaa0987e7ec58181dc3d6c18d688fa5e191ef4d53a10df375a42bac5e2d
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0051f5950d3497fd49cd25af996fe01c32a4128ba6dc3623a168e00768ef4bd5
3
  size 15920
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6b191726b8c03f5e523ba50eb220f1728e82f11657f92ec30a0f367e31c0945
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81af3423c5f1a9239eebc7b36cb6e6db3f9862f7b90cd7560fef2590ee1d68d0
3
  size 15920
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae89d13f767f2c751bc315568edaf6650d050eb04a09f1b3bd9d20f069ee5007
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:493f5e097b662c4de6f929779988d574e0855983f464da2bbac2cf6d59691a7a
3
  size 15920
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1147b95ab7e867550d2f4e0481eddb98045dd538809f0c554423f517ebb61468
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47313163e11ebbda29b8bf91fd61cb4b29fc84b8ec482325f230809ff25c6426
3
  size 15920
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dea28a2bfa00902c551f1f93e746f32ec9126cb389e7c8deda3380b1f2fec426
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ced86f3a1c08cecda79ca695145fe007ebcfd4f2f8962847f6a9d9d58b4b557b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4049079754601227,
5
  "eval_steps": 66,
6
- "global_step": 132,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -955,6 +955,476 @@
955
  "eval_samples_per_second": 1.796,
956
  "eval_steps_per_second": 0.126,
957
  "step": 132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
958
  }
959
  ],
960
  "logging_steps": 1,
@@ -974,7 +1444,7 @@
974
  "attributes": {}
975
  }
976
  },
977
- "total_flos": 144143397421056.0,
978
  "train_batch_size": 2,
979
  "trial_name": null,
980
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6073619631901841,
5
  "eval_steps": 66,
6
+ "global_step": 198,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
955
  "eval_samples_per_second": 1.796,
956
  "eval_steps_per_second": 0.126,
957
  "step": 132
958
+ },
959
+ {
960
+ "epoch": 0.40797546012269936,
961
+ "grad_norm": 0.2703996506167688,
962
+ "learning_rate": 7.29648897747144e-05,
963
+ "loss": 1.9516,
964
+ "step": 133
965
+ },
966
+ {
967
+ "epoch": 0.4110429447852761,
968
+ "grad_norm": 0.2499546230234631,
969
+ "learning_rate": 7.254036429817058e-05,
970
+ "loss": 2.0144,
971
+ "step": 134
972
+ },
973
+ {
974
+ "epoch": 0.41411042944785276,
975
+ "grad_norm": 0.2755759481735348,
976
+ "learning_rate": 7.211399001415866e-05,
977
+ "loss": 1.8909,
978
+ "step": 135
979
+ },
980
+ {
981
+ "epoch": 0.4171779141104294,
982
+ "grad_norm": 0.25578131710544816,
983
+ "learning_rate": 7.168581186381824e-05,
984
+ "loss": 1.9747,
985
+ "step": 136
986
+ },
987
+ {
988
+ "epoch": 0.42024539877300615,
989
+ "grad_norm": 0.27719697668216164,
990
+ "learning_rate": 7.12558749784219e-05,
991
+ "loss": 1.9548,
992
+ "step": 137
993
+ },
994
+ {
995
+ "epoch": 0.4233128834355828,
996
+ "grad_norm": 0.3398789070245734,
997
+ "learning_rate": 7.082422467461816e-05,
998
+ "loss": 1.9209,
999
+ "step": 138
1000
+ },
1001
+ {
1002
+ "epoch": 0.4263803680981595,
1003
+ "grad_norm": 0.3891484871642631,
1004
+ "learning_rate": 7.03909064496551e-05,
1005
+ "loss": 1.8979,
1006
+ "step": 139
1007
+ },
1008
+ {
1009
+ "epoch": 0.4294478527607362,
1010
+ "grad_norm": 0.28744028744457395,
1011
+ "learning_rate": 6.995596597658468e-05,
1012
+ "loss": 1.8568,
1013
+ "step": 140
1014
+ },
1015
+ {
1016
+ "epoch": 0.4325153374233129,
1017
+ "grad_norm": 0.465137214109235,
1018
+ "learning_rate": 6.951944909944877e-05,
1019
+ "loss": 1.9201,
1020
+ "step": 141
1021
+ },
1022
+ {
1023
+ "epoch": 0.43558282208588955,
1024
+ "grad_norm": 0.26138177619827196,
1025
+ "learning_rate": 6.908140182844695e-05,
1026
+ "loss": 1.9864,
1027
+ "step": 142
1028
+ },
1029
+ {
1030
+ "epoch": 0.4386503067484663,
1031
+ "grad_norm": 0.2580799320688176,
1032
+ "learning_rate": 6.864187033508695e-05,
1033
+ "loss": 1.9603,
1034
+ "step": 143
1035
+ },
1036
+ {
1037
+ "epoch": 0.44171779141104295,
1038
+ "grad_norm": 0.2342374798488655,
1039
+ "learning_rate": 6.820090094731808e-05,
1040
+ "loss": 1.8695,
1041
+ "step": 144
1042
+ },
1043
+ {
1044
+ "epoch": 0.4447852760736196,
1045
+ "grad_norm": 0.31939812381318156,
1046
+ "learning_rate": 6.775854014464799e-05,
1047
+ "loss": 1.89,
1048
+ "step": 145
1049
+ },
1050
+ {
1051
+ "epoch": 0.44785276073619634,
1052
+ "grad_norm": 0.3745349673551468,
1053
+ "learning_rate": 6.731483455324374e-05,
1054
+ "loss": 1.9072,
1055
+ "step": 146
1056
+ },
1057
+ {
1058
+ "epoch": 0.450920245398773,
1059
+ "grad_norm": 0.2398137142916484,
1060
+ "learning_rate": 6.686983094101712e-05,
1061
+ "loss": 1.9224,
1062
+ "step": 147
1063
+ },
1064
+ {
1065
+ "epoch": 0.4539877300613497,
1066
+ "grad_norm": 0.7029063348936169,
1067
+ "learning_rate": 6.642357621269535e-05,
1068
+ "loss": 1.9042,
1069
+ "step": 148
1070
+ },
1071
+ {
1072
+ "epoch": 0.4570552147239264,
1073
+ "grad_norm": 0.9822378439608801,
1074
+ "learning_rate": 6.597611740487698e-05,
1075
+ "loss": 1.9367,
1076
+ "step": 149
1077
+ },
1078
+ {
1079
+ "epoch": 0.4601226993865031,
1080
+ "grad_norm": 0.30640641324748263,
1081
+ "learning_rate": 6.55275016810742e-05,
1082
+ "loss": 1.8906,
1083
+ "step": 150
1084
+ },
1085
+ {
1086
+ "epoch": 0.46319018404907975,
1087
+ "grad_norm": 0.28453603828616697,
1088
+ "learning_rate": 6.507777632674165e-05,
1089
+ "loss": 1.9607,
1090
+ "step": 151
1091
+ },
1092
+ {
1093
+ "epoch": 0.4662576687116564,
1094
+ "grad_norm": 0.6855412180718642,
1095
+ "learning_rate": 6.462698874429239e-05,
1096
+ "loss": 1.8572,
1097
+ "step": 152
1098
+ },
1099
+ {
1100
+ "epoch": 0.46932515337423314,
1101
+ "grad_norm": 0.2849104974414773,
1102
+ "learning_rate": 6.417518644810155e-05,
1103
+ "loss": 1.9385,
1104
+ "step": 153
1105
+ },
1106
+ {
1107
+ "epoch": 0.4723926380368098,
1108
+ "grad_norm": 0.31769414398981494,
1109
+ "learning_rate": 6.372241705949815e-05,
1110
+ "loss": 1.8972,
1111
+ "step": 154
1112
+ },
1113
+ {
1114
+ "epoch": 0.4754601226993865,
1115
+ "grad_norm": 0.6853208214886923,
1116
+ "learning_rate": 6.326872830174567e-05,
1117
+ "loss": 1.873,
1118
+ "step": 155
1119
+ },
1120
+ {
1121
+ "epoch": 0.4785276073619632,
1122
+ "grad_norm": 0.3810470202905365,
1123
+ "learning_rate": 6.281416799501188e-05,
1124
+ "loss": 2.0,
1125
+ "step": 156
1126
+ },
1127
+ {
1128
+ "epoch": 0.4815950920245399,
1129
+ "grad_norm": 0.3784628917790679,
1130
+ "learning_rate": 6.235878405132842e-05,
1131
+ "loss": 1.8814,
1132
+ "step": 157
1133
+ },
1134
+ {
1135
+ "epoch": 0.48466257668711654,
1136
+ "grad_norm": 0.3427014353184805,
1137
+ "learning_rate": 6.190262446954085e-05,
1138
+ "loss": 1.9223,
1139
+ "step": 158
1140
+ },
1141
+ {
1142
+ "epoch": 0.48773006134969327,
1143
+ "grad_norm": 0.46855229041092994,
1144
+ "learning_rate": 6.144573733024922e-05,
1145
+ "loss": 1.9059,
1146
+ "step": 159
1147
+ },
1148
+ {
1149
+ "epoch": 0.49079754601226994,
1150
+ "grad_norm": 0.29232827174073656,
1151
+ "learning_rate": 6.0988170790740416e-05,
1152
+ "loss": 1.8491,
1153
+ "step": 160
1154
+ },
1155
+ {
1156
+ "epoch": 0.4938650306748466,
1157
+ "grad_norm": 0.30132959369450213,
1158
+ "learning_rate": 6.052997307991214e-05,
1159
+ "loss": 1.9595,
1160
+ "step": 161
1161
+ },
1162
+ {
1163
+ "epoch": 0.49693251533742333,
1164
+ "grad_norm": 0.3195413242096082,
1165
+ "learning_rate": 6.007119249318945e-05,
1166
+ "loss": 1.9063,
1167
+ "step": 162
1168
+ },
1169
+ {
1170
+ "epoch": 0.5,
1171
+ "grad_norm": 0.34517635749728204,
1172
+ "learning_rate": 5.961187738743432e-05,
1173
+ "loss": 1.9111,
1174
+ "step": 163
1175
+ },
1176
+ {
1177
+ "epoch": 0.5030674846625767,
1178
+ "grad_norm": 0.2593428730143879,
1179
+ "learning_rate": 5.9152076175848594e-05,
1180
+ "loss": 1.9011,
1181
+ "step": 164
1182
+ },
1183
+ {
1184
+ "epoch": 0.5061349693251533,
1185
+ "grad_norm": 0.31658622781595325,
1186
+ "learning_rate": 5.86918373228712e-05,
1187
+ "loss": 1.9918,
1188
+ "step": 165
1189
+ },
1190
+ {
1191
+ "epoch": 0.50920245398773,
1192
+ "grad_norm": 0.6628038110211543,
1193
+ "learning_rate": 5.8231209339069746e-05,
1194
+ "loss": 1.9152,
1195
+ "step": 166
1196
+ },
1197
+ {
1198
+ "epoch": 0.5122699386503068,
1199
+ "grad_norm": 0.2797312671008732,
1200
+ "learning_rate": 5.777024077602744e-05,
1201
+ "loss": 1.868,
1202
+ "step": 167
1203
+ },
1204
+ {
1205
+ "epoch": 0.5153374233128835,
1206
+ "grad_norm": 0.26640093514522606,
1207
+ "learning_rate": 5.730898022122554e-05,
1208
+ "loss": 1.8938,
1209
+ "step": 168
1210
+ },
1211
+ {
1212
+ "epoch": 0.5184049079754601,
1213
+ "grad_norm": 0.4054825634426873,
1214
+ "learning_rate": 5.6847476292922155e-05,
1215
+ "loss": 1.9428,
1216
+ "step": 169
1217
+ },
1218
+ {
1219
+ "epoch": 0.5214723926380368,
1220
+ "grad_norm": 0.29142731230985613,
1221
+ "learning_rate": 5.6385777635027684e-05,
1222
+ "loss": 1.8903,
1223
+ "step": 170
1224
+ },
1225
+ {
1226
+ "epoch": 0.5245398773006135,
1227
+ "grad_norm": 0.3511142336480421,
1228
+ "learning_rate": 5.5923932911977575e-05,
1229
+ "loss": 1.9386,
1230
+ "step": 171
1231
+ },
1232
+ {
1233
+ "epoch": 0.5276073619631901,
1234
+ "grad_norm": 0.5560176165666619,
1235
+ "learning_rate": 5.5461990803603045e-05,
1236
+ "loss": 1.9562,
1237
+ "step": 172
1238
+ },
1239
+ {
1240
+ "epoch": 0.5306748466257669,
1241
+ "grad_norm": 0.3171565471545065,
1242
+ "learning_rate": 5.500000000000001e-05,
1243
+ "loss": 1.9565,
1244
+ "step": 173
1245
+ },
1246
+ {
1247
+ "epoch": 0.5337423312883436,
1248
+ "grad_norm": 0.29095744910567595,
1249
+ "learning_rate": 5.4538009196396966e-05,
1250
+ "loss": 1.9282,
1251
+ "step": 174
1252
+ },
1253
+ {
1254
+ "epoch": 0.5368098159509203,
1255
+ "grad_norm": 0.41192796716349284,
1256
+ "learning_rate": 5.407606708802244e-05,
1257
+ "loss": 1.918,
1258
+ "step": 175
1259
+ },
1260
+ {
1261
+ "epoch": 0.5398773006134969,
1262
+ "grad_norm": 0.5305521764688194,
1263
+ "learning_rate": 5.361422236497235e-05,
1264
+ "loss": 1.9096,
1265
+ "step": 176
1266
+ },
1267
+ {
1268
+ "epoch": 0.5429447852760736,
1269
+ "grad_norm": 0.6434585908707302,
1270
+ "learning_rate": 5.315252370707786e-05,
1271
+ "loss": 1.8935,
1272
+ "step": 177
1273
+ },
1274
+ {
1275
+ "epoch": 0.5460122699386503,
1276
+ "grad_norm": 0.2939723397914849,
1277
+ "learning_rate": 5.2691019778774465e-05,
1278
+ "loss": 1.9531,
1279
+ "step": 178
1280
+ },
1281
+ {
1282
+ "epoch": 0.549079754601227,
1283
+ "grad_norm": 0.4989500512121766,
1284
+ "learning_rate": 5.2229759223972574e-05,
1285
+ "loss": 1.9341,
1286
+ "step": 179
1287
+ },
1288
+ {
1289
+ "epoch": 0.5521472392638037,
1290
+ "grad_norm": 0.6024485433735285,
1291
+ "learning_rate": 5.1768790660930265e-05,
1292
+ "loss": 1.9001,
1293
+ "step": 180
1294
+ },
1295
+ {
1296
+ "epoch": 0.5552147239263804,
1297
+ "grad_norm": 0.47950946229716923,
1298
+ "learning_rate": 5.130816267712881e-05,
1299
+ "loss": 1.9209,
1300
+ "step": 181
1301
+ },
1302
+ {
1303
+ "epoch": 0.558282208588957,
1304
+ "grad_norm": 1.2341600337232164,
1305
+ "learning_rate": 5.0847923824151424e-05,
1306
+ "loss": 1.977,
1307
+ "step": 182
1308
+ },
1309
+ {
1310
+ "epoch": 0.5613496932515337,
1311
+ "grad_norm": 0.3100804420788902,
1312
+ "learning_rate": 5.038812261256569e-05,
1313
+ "loss": 1.9594,
1314
+ "step": 183
1315
+ },
1316
+ {
1317
+ "epoch": 0.5644171779141104,
1318
+ "grad_norm": 0.5390046601483737,
1319
+ "learning_rate": 4.992880750681056e-05,
1320
+ "loss": 1.8533,
1321
+ "step": 184
1322
+ },
1323
+ {
1324
+ "epoch": 0.5674846625766872,
1325
+ "grad_norm": 0.43167483611230206,
1326
+ "learning_rate": 4.9470026920087876e-05,
1327
+ "loss": 1.8782,
1328
+ "step": 185
1329
+ },
1330
+ {
1331
+ "epoch": 0.5705521472392638,
1332
+ "grad_norm": 0.3684508227191539,
1333
+ "learning_rate": 4.901182920925961e-05,
1334
+ "loss": 1.8684,
1335
+ "step": 186
1336
+ },
1337
+ {
1338
+ "epoch": 0.5736196319018405,
1339
+ "grad_norm": 0.2936392864589777,
1340
+ "learning_rate": 4.8554262669750794e-05,
1341
+ "loss": 1.8586,
1342
+ "step": 187
1343
+ },
1344
+ {
1345
+ "epoch": 0.5766871165644172,
1346
+ "grad_norm": 0.3204686860443095,
1347
+ "learning_rate": 4.809737553045916e-05,
1348
+ "loss": 1.8977,
1349
+ "step": 188
1350
+ },
1351
+ {
1352
+ "epoch": 0.5797546012269938,
1353
+ "grad_norm": 0.3024045894502796,
1354
+ "learning_rate": 4.764121594867157e-05,
1355
+ "loss": 1.8882,
1356
+ "step": 189
1357
+ },
1358
+ {
1359
+ "epoch": 0.5828220858895705,
1360
+ "grad_norm": 0.40522790311176354,
1361
+ "learning_rate": 4.718583200498814e-05,
1362
+ "loss": 1.924,
1363
+ "step": 190
1364
+ },
1365
+ {
1366
+ "epoch": 0.5858895705521472,
1367
+ "grad_norm": 0.5053931616075322,
1368
+ "learning_rate": 4.673127169825433e-05,
1369
+ "loss": 1.8868,
1370
+ "step": 191
1371
+ },
1372
+ {
1373
+ "epoch": 0.588957055214724,
1374
+ "grad_norm": 0.3211686422583536,
1375
+ "learning_rate": 4.627758294050185e-05,
1376
+ "loss": 1.9068,
1377
+ "step": 192
1378
+ },
1379
+ {
1380
+ "epoch": 0.5920245398773006,
1381
+ "grad_norm": 0.24127093990601076,
1382
+ "learning_rate": 4.582481355189846e-05,
1383
+ "loss": 1.895,
1384
+ "step": 193
1385
+ },
1386
+ {
1387
+ "epoch": 0.5950920245398773,
1388
+ "grad_norm": 0.4074710701692581,
1389
+ "learning_rate": 4.537301125570763e-05,
1390
+ "loss": 1.8969,
1391
+ "step": 194
1392
+ },
1393
+ {
1394
+ "epoch": 0.598159509202454,
1395
+ "grad_norm": 0.25841948774460555,
1396
+ "learning_rate": 4.492222367325837e-05,
1397
+ "loss": 1.94,
1398
+ "step": 195
1399
+ },
1400
+ {
1401
+ "epoch": 0.6012269938650306,
1402
+ "grad_norm": 0.2943706481314386,
1403
+ "learning_rate": 4.447249831892583e-05,
1404
+ "loss": 1.9482,
1405
+ "step": 196
1406
+ },
1407
+ {
1408
+ "epoch": 0.6042944785276073,
1409
+ "grad_norm": 0.3110992148589072,
1410
+ "learning_rate": 4.402388259512303e-05,
1411
+ "loss": 1.9495,
1412
+ "step": 197
1413
+ },
1414
+ {
1415
+ "epoch": 0.6073619631901841,
1416
+ "grad_norm": 0.3723312760498351,
1417
+ "learning_rate": 4.357642378730466e-05,
1418
+ "loss": 1.9213,
1419
+ "step": 198
1420
+ },
1421
+ {
1422
+ "epoch": 0.6073619631901841,
1423
+ "eval_loss": 2.594010353088379,
1424
+ "eval_runtime": 55.7716,
1425
+ "eval_samples_per_second": 1.793,
1426
+ "eval_steps_per_second": 0.126,
1427
+ "step": 198
1428
  }
1429
  ],
1430
  "logging_steps": 1,
 
1444
  "attributes": {}
1445
  }
1446
  },
1447
+ "total_flos": 216215096131584.0,
1448
  "train_batch_size": 2,
1449
  "trial_name": null,
1450
  "trial_params": null