ZeroUniqueness commited on
Commit
62bf125
Β·
1 Parent(s): 8d2b789

Training in progress, step 5000

Browse files
Files changed (25) hide show
  1. adapter_model.bin +1 -1
  2. {checkpoint-4600 β†’ checkpoint-4900/adapter_model}/README.md +0 -0
  3. {checkpoint-4600 β†’ checkpoint-4900/adapter_model}/adapter_config.json +4 -4
  4. {checkpoint-4600 β†’ checkpoint-4900/adapter_model}/adapter_model.bin +1 -1
  5. {checkpoint-4600/adapter_model β†’ checkpoint-5000}/README.md +0 -0
  6. {checkpoint-4600/adapter_model β†’ checkpoint-5000}/adapter_config.json +4 -4
  7. {checkpoint-4600/adapter_model β†’ checkpoint-5000}/adapter_model.bin +1 -1
  8. {checkpoint-4600 β†’ checkpoint-5000}/optimizer.pt +1 -1
  9. {checkpoint-4600 β†’ checkpoint-5000}/rng_state_0.pth +1 -1
  10. {checkpoint-4600 β†’ checkpoint-5000}/rng_state_1.pth +1 -1
  11. {checkpoint-4600 β†’ checkpoint-5000}/rng_state_10.pth +1 -1
  12. {checkpoint-4600 β†’ checkpoint-5000}/rng_state_11.pth +1 -1
  13. {checkpoint-4600 β†’ checkpoint-5000}/rng_state_12.pth +1 -1
  14. {checkpoint-4600 β†’ checkpoint-5000}/rng_state_13.pth +1 -1
  15. {checkpoint-4600 β†’ checkpoint-5000}/rng_state_2.pth +1 -1
  16. {checkpoint-4600 β†’ checkpoint-5000}/rng_state_3.pth +1 -1
  17. {checkpoint-4600 β†’ checkpoint-5000}/rng_state_4.pth +1 -1
  18. {checkpoint-4600 β†’ checkpoint-5000}/rng_state_5.pth +1 -1
  19. {checkpoint-4600 β†’ checkpoint-5000}/rng_state_6.pth +1 -1
  20. {checkpoint-4600 β†’ checkpoint-5000}/rng_state_7.pth +1 -1
  21. {checkpoint-4600 β†’ checkpoint-5000}/rng_state_8.pth +1 -1
  22. {checkpoint-4600 β†’ checkpoint-5000}/rng_state_9.pth +1 -1
  23. {checkpoint-4600 β†’ checkpoint-5000}/scheduler.pt +1 -1
  24. {checkpoint-4600 β†’ checkpoint-5000}/trainer_state.json +107 -3
  25. {checkpoint-4600 β†’ checkpoint-5000}/training_args.bin +1 -1
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a3bb33a0c4878b032aec2026e3879865fa62989d9939826a716eb3580fa34cd
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:658b9b2dec5afef11956f93a69d1e5899dfaf7ec45314dbb9a4f4fe9a8d341ef
3
  size 500897101
{checkpoint-4600 β†’ checkpoint-4900/adapter_model}/README.md RENAMED
File without changes
{checkpoint-4600 β†’ checkpoint-4900/adapter_model}/adapter_config.json RENAMED
@@ -14,13 +14,13 @@
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
 
 
 
17
  "gate_proj",
18
  "v_proj",
19
- "k_proj",
20
  "q_proj",
21
- "up_proj",
22
- "o_proj",
23
- "down_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
+ "k_proj",
18
+ "o_proj",
19
+ "down_proj",
20
  "gate_proj",
21
  "v_proj",
 
22
  "q_proj",
23
+ "up_proj"
 
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
{checkpoint-4600 β†’ checkpoint-4900/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68baca464c45ad2c5da146b800b15dadf06eb685f6905c3cba44f31d2963c595
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a3bb33a0c4878b032aec2026e3879865fa62989d9939826a716eb3580fa34cd
3
  size 500897101
{checkpoint-4600/adapter_model β†’ checkpoint-5000}/README.md RENAMED
File without changes
{checkpoint-4600/adapter_model β†’ checkpoint-5000}/adapter_config.json RENAMED
@@ -14,13 +14,13 @@
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
 
 
 
17
  "gate_proj",
18
  "v_proj",
19
- "k_proj",
20
  "q_proj",
21
- "up_proj",
22
- "o_proj",
23
- "down_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
+ "k_proj",
18
+ "o_proj",
19
+ "down_proj",
20
  "gate_proj",
21
  "v_proj",
 
22
  "q_proj",
23
+ "up_proj"
 
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
{checkpoint-4600/adapter_model β†’ checkpoint-5000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68baca464c45ad2c5da146b800b15dadf06eb685f6905c3cba44f31d2963c595
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:658b9b2dec5afef11956f93a69d1e5899dfaf7ec45314dbb9a4f4fe9a8d341ef
3
  size 500897101
{checkpoint-4600 β†’ checkpoint-5000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fce4744c6f9f99ebf88811d26dadf3b80e0f50d32d46161fc773f1754ac501b
3
  size 1001752701
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41a19364f98039f3de4a4fce0e0896ae1c403fab5b0de5a6478b8b3154215366
3
  size 1001752701
{checkpoint-4600 β†’ checkpoint-5000}/rng_state_0.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87cf73a108581136c28003995848a3469b34884b54056edbd96d9426b19286d4
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3591569acf36220e21cfa3f00c334d4afa5214c3fa0f7f20b6081190208a2ce
3
  size 27772
{checkpoint-4600 β†’ checkpoint-5000}/rng_state_1.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ed08fdd87a9eb079576d2d2e79f82d8d181e81e5ea1aaaea16afb65f918ed37
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a3d8da1aa54796771be8e0f102c1ba0e9bce209cd7f83f5e4242dbd0a2bfb18
3
  size 27772
{checkpoint-4600 β†’ checkpoint-5000}/rng_state_10.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5231e38f102d1b091797b4cb63ddc5adec05d33a2851436dd68077cbb0064d28
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:263b9c81bf6b52324cce079cb862bf0c4c6c076726306fe430175760cf77ce1d
3
  size 27789
{checkpoint-4600 β†’ checkpoint-5000}/rng_state_11.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69f01b3fcb68dbb72dd1357f58bcfac99842616b87f82438c24f947602cd918b
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfd9622dd9c9e0f101a3891d78b76d8a750232389d97fff8a48e4ab0113e3bf0
3
  size 27789
{checkpoint-4600 β†’ checkpoint-5000}/rng_state_12.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f2bd26cbc1398ca2ebd540399d0c4d6338ed464d548af7c1038f852ca6219db
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b52b3225a4c307e1e07d040be377b2268078fb40662cf123cf14c9225dfc3d2
3
  size 27789
{checkpoint-4600 β†’ checkpoint-5000}/rng_state_13.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:858e20e80f50042ced11348b07f992a32041d7732ed58a017be4527ed0a13118
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7022a9a5c490e593972113d5eec0a181e018cdd0b74c023342974a3d2a43471
3
  size 27789
{checkpoint-4600 β†’ checkpoint-5000}/rng_state_2.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d968dcc8ca9c991c99a196e8d92215f20f736625130c876c9b64041bce288b6d
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ea427538851652ebb74224ead7e27e9853d1a0254ab5742c7f4e6a75f1a155b
3
  size 27772
{checkpoint-4600 β†’ checkpoint-5000}/rng_state_3.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9350788e0e90e8024f2d9ed4e04b572e4fb0c1a913ae77e1c4e4f233867609d0
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84c0d3bf83700c5dcf78d4bca3c01d4661af97db85b1e1021458aa4ebf191dc3
3
  size 27772
{checkpoint-4600 β†’ checkpoint-5000}/rng_state_4.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e65ae4aaa3c776537f2bf5c5785311caf3d273bfe047df490befdca5b8e26247
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88b40363540e71d8f1d1cd8dbf6a880e5a85fe8c36554f2c6a6e6bf87eeee7ef
3
  size 27772
{checkpoint-4600 β†’ checkpoint-5000}/rng_state_5.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22bdddfd4edfea8b430f652448271c5f5d87f3505a772758c6417e8ff98655c4
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f452a098fe1f3d5946e80ca1866e99d8de678f832a877b5bd605c904adccd168
3
  size 27772
{checkpoint-4600 β†’ checkpoint-5000}/rng_state_6.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a2c49429dcbfeca1faa7a663803a0fdae88ad305b95d23e46226445395b9946
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6329d34fc46a440591bbc38dc3e735aa628275c46bc5f5d67ea60d2ac817c9f
3
  size 27772
{checkpoint-4600 β†’ checkpoint-5000}/rng_state_7.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9c7570bf926e320eb7895bf959a53e1980cb414ed8bc39b4eda9b9478584053
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abc478444408d585c8510980bf6c3724dbd04c62fe3f44191f641810f4f2b408
3
  size 27772
{checkpoint-4600 β†’ checkpoint-5000}/rng_state_8.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb6fa0fe38db2df9eb256d09e9da98351346be62c7aab5a72c2a011d6913561a
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43a4f27fdaf452bec4190380281039b2452b0cc819d9babdb1883bb6338b79df
3
  size 27772
{checkpoint-4600 β†’ checkpoint-5000}/rng_state_9.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ed3fe525bbbbe135513154ac6a01e1c556fe2d6d16f9ab7210f0476687c9b5e
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd492bc5d4f48c5532176ee3ad99275b5bfcd315a528c245cfa8d46477f6b13e
3
  size 27772
{checkpoint-4600 β†’ checkpoint-5000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdc6ea8a6b4cf0316e8365ff2c59039288c5d56c6df930ea3adced9242e298fc
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:091fab5b44662db384d56b3c50422b403065ff0f62fb64f68c521ac3c38ad752
3
  size 627
{checkpoint-4600 β†’ checkpoint-5000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.7836370686312524,
5
- "global_step": 4600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1118,11 +1118,115 @@
1118
  "learning_rate": 7.089008200684197e-05,
1119
  "loss": 0.7982,
1120
  "step": 4600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1121
  }
1122
  ],
1123
  "max_steps": 7737,
1124
  "num_train_epochs": 3,
1125
- "total_flos": 1.980912103157924e+19,
1126
  "trial_name": null,
1127
  "trial_params": null
1128
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9387359441644048,
5
+ "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1118
  "learning_rate": 7.089008200684197e-05,
1119
  "loss": 0.7982,
1120
  "step": 4600
1121
+ },
1122
+ {
1123
+ "epoch": 1.79,
1124
+ "learning_rate": 6.991918777418928e-05,
1125
+ "loss": 0.7916,
1126
+ "step": 4625
1127
+ },
1128
+ {
1129
+ "epoch": 1.8,
1130
+ "learning_rate": 6.895140127495455e-05,
1131
+ "loss": 0.7919,
1132
+ "step": 4650
1133
+ },
1134
+ {
1135
+ "epoch": 1.81,
1136
+ "learning_rate": 6.798682249388631e-05,
1137
+ "loss": 0.7863,
1138
+ "step": 4675
1139
+ },
1140
+ {
1141
+ "epoch": 1.82,
1142
+ "learning_rate": 6.702555108433461e-05,
1143
+ "loss": 0.789,
1144
+ "step": 4700
1145
+ },
1146
+ {
1147
+ "epoch": 1.83,
1148
+ "learning_rate": 6.606768635795574e-05,
1149
+ "loss": 0.7902,
1150
+ "step": 4725
1151
+ },
1152
+ {
1153
+ "epoch": 1.84,
1154
+ "learning_rate": 6.511332727445191e-05,
1155
+ "loss": 0.7924,
1156
+ "step": 4750
1157
+ },
1158
+ {
1159
+ "epoch": 1.85,
1160
+ "learning_rate": 6.416257243134747e-05,
1161
+ "loss": 0.7957,
1162
+ "step": 4775
1163
+ },
1164
+ {
1165
+ "epoch": 1.86,
1166
+ "learning_rate": 6.321552005380256e-05,
1167
+ "loss": 0.7916,
1168
+ "step": 4800
1169
+ },
1170
+ {
1171
+ "epoch": 1.87,
1172
+ "learning_rate": 6.22722679844652e-05,
1173
+ "loss": 0.7867,
1174
+ "step": 4825
1175
+ },
1176
+ {
1177
+ "epoch": 1.88,
1178
+ "learning_rate": 6.133291367336284e-05,
1179
+ "loss": 0.7944,
1180
+ "step": 4850
1181
+ },
1182
+ {
1183
+ "epoch": 1.89,
1184
+ "learning_rate": 6.039755416783457e-05,
1185
+ "loss": 0.7982,
1186
+ "step": 4875
1187
+ },
1188
+ {
1189
+ "epoch": 1.9,
1190
+ "learning_rate": 5.946628610250484e-05,
1191
+ "loss": 0.7918,
1192
+ "step": 4900
1193
+ },
1194
+ {
1195
+ "epoch": 1.91,
1196
+ "learning_rate": 5.853920568929996e-05,
1197
+ "loss": 0.7921,
1198
+ "step": 4925
1199
+ },
1200
+ {
1201
+ "epoch": 1.92,
1202
+ "learning_rate": 5.761640870750799e-05,
1203
+ "loss": 0.7878,
1204
+ "step": 4950
1205
+ },
1206
+ {
1207
+ "epoch": 1.93,
1208
+ "learning_rate": 5.669799049388375e-05,
1209
+ "loss": 0.7901,
1210
+ "step": 4975
1211
+ },
1212
+ {
1213
+ "epoch": 1.94,
1214
+ "learning_rate": 5.578404593279911e-05,
1215
+ "loss": 0.7858,
1216
+ "step": 5000
1217
+ },
1218
+ {
1219
+ "epoch": 1.94,
1220
+ "eval_loss": 0.807844877243042,
1221
+ "eval_runtime": 59.586,
1222
+ "eval_samples_per_second": 12.251,
1223
+ "eval_steps_per_second": 0.889,
1224
+ "step": 5000
1225
  }
1226
  ],
1227
  "max_steps": 7737,
1228
  "num_train_epochs": 3,
1229
+ "total_flos": 2.1532695573582316e+19,
1230
  "trial_name": null,
1231
  "trial_params": null
1232
  }
{checkpoint-4600 β†’ checkpoint-5000}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4458bf553277b063d908ed9668b95abc04892c52d8d793f6b007433394d06f17
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:354c4769acaf21317d8ab58ba2dd133763f3bee1aec7488fb334e45f3e80ebc3
3
  size 4027