ZeroUniqueness commited on
Commit
896b733
Β·
1 Parent(s): 5a016fe

Training in progress, step 5300

Browse files
Files changed (27) hide show
  1. adapter_config.json +5 -5
  2. adapter_model.bin +1 -1
  3. {checkpoint-4900 β†’ checkpoint-5200/adapter_model}/README.md +0 -0
  4. {checkpoint-4900 β†’ checkpoint-5200}/adapter_model/adapter_config.json +4 -4
  5. {checkpoint-4900 β†’ checkpoint-5200/adapter_model}/adapter_model.bin +1 -1
  6. {checkpoint-4900/adapter_model β†’ checkpoint-5300}/README.md +0 -0
  7. {checkpoint-4900 β†’ checkpoint-5300}/adapter_config.json +3 -3
  8. {checkpoint-4900/adapter_model β†’ checkpoint-5300}/adapter_model.bin +1 -1
  9. {checkpoint-4900 β†’ checkpoint-5300}/optimizer.pt +1 -1
  10. {checkpoint-4900 β†’ checkpoint-5300}/rng_state_0.pth +1 -1
  11. {checkpoint-4900 β†’ checkpoint-5300}/rng_state_1.pth +1 -1
  12. {checkpoint-4900 β†’ checkpoint-5300}/rng_state_10.pth +1 -1
  13. {checkpoint-4900 β†’ checkpoint-5300}/rng_state_11.pth +1 -1
  14. {checkpoint-4900 β†’ checkpoint-5300}/rng_state_12.pth +1 -1
  15. {checkpoint-4900 β†’ checkpoint-5300}/rng_state_13.pth +1 -1
  16. {checkpoint-4900 β†’ checkpoint-5300}/rng_state_2.pth +1 -1
  17. {checkpoint-4900 β†’ checkpoint-5300}/rng_state_3.pth +1 -1
  18. {checkpoint-4900 β†’ checkpoint-5300}/rng_state_4.pth +1 -1
  19. {checkpoint-4900 β†’ checkpoint-5300}/rng_state_5.pth +1 -1
  20. {checkpoint-4900 β†’ checkpoint-5300}/rng_state_6.pth +1 -1
  21. {checkpoint-4900 β†’ checkpoint-5300}/rng_state_7.pth +1 -1
  22. {checkpoint-4900 β†’ checkpoint-5300}/rng_state_8.pth +1 -1
  23. {checkpoint-4900 β†’ checkpoint-5300}/rng_state_9.pth +1 -1
  24. {checkpoint-4900 β†’ checkpoint-5300}/scheduler.pt +1 -1
  25. {checkpoint-4900 β†’ checkpoint-5300}/trainer_state.json +107 -3
  26. {checkpoint-4900 β†’ checkpoint-5300}/training_args.bin +1 -1
  27. training_args.bin +1 -1
adapter_config.json CHANGED
@@ -14,13 +14,13 @@
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
- "v_proj",
18
- "up_proj",
19
- "k_proj",
20
  "q_proj",
21
- "down_proj",
22
  "gate_proj",
23
- "o_proj"
 
 
 
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
 
 
 
17
  "q_proj",
 
18
  "gate_proj",
19
+ "o_proj",
20
+ "down_proj",
21
+ "k_proj",
22
+ "v_proj",
23
+ "up_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28c2d0c51dd5ec8f66ff0bbdb8ae6185bd1eca75e6b3b3409367c09446351f5a
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14ca48bd88a31002dc05f9274bc978bc8b5b8ae3e51470e74ee7bc0bb12284e9
3
  size 500897101
{checkpoint-4900 β†’ checkpoint-5200/adapter_model}/README.md RENAMED
File without changes
{checkpoint-4900 β†’ checkpoint-5200}/adapter_model/adapter_config.json RENAMED
@@ -14,13 +14,13 @@
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
 
 
17
  "k_proj",
18
- "o_proj",
19
  "down_proj",
20
  "gate_proj",
21
- "v_proj",
22
- "q_proj",
23
- "up_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
+ "v_proj",
18
+ "up_proj",
19
  "k_proj",
20
+ "q_proj",
21
  "down_proj",
22
  "gate_proj",
23
+ "o_proj"
 
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
{checkpoint-4900 β†’ checkpoint-5200/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a3bb33a0c4878b032aec2026e3879865fa62989d9939826a716eb3580fa34cd
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28c2d0c51dd5ec8f66ff0bbdb8ae6185bd1eca75e6b3b3409367c09446351f5a
3
  size 500897101
{checkpoint-4900/adapter_model β†’ checkpoint-5300}/README.md RENAMED
File without changes
{checkpoint-4900 β†’ checkpoint-5300}/adapter_config.json RENAMED
@@ -14,12 +14,12 @@
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
- "k_proj",
 
18
  "o_proj",
19
  "down_proj",
20
- "gate_proj",
21
  "v_proj",
22
- "q_proj",
23
  "up_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
 
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
+ "q_proj",
18
+ "gate_proj",
19
  "o_proj",
20
  "down_proj",
21
+ "k_proj",
22
  "v_proj",
 
23
  "up_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
{checkpoint-4900/adapter_model β†’ checkpoint-5300}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a3bb33a0c4878b032aec2026e3879865fa62989d9939826a716eb3580fa34cd
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14ca48bd88a31002dc05f9274bc978bc8b5b8ae3e51470e74ee7bc0bb12284e9
3
  size 500897101
{checkpoint-4900 β†’ checkpoint-5300}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89baee7be101e0977ca1b6a39db7917193704b00b2bc0dc4bbf1b2e60e7e6a22
3
  size 1001752701
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:869a51ca719bee8b6aab465be97fa4ac5e228c769321c2712f644067ceeca076
3
  size 1001752701
{checkpoint-4900 β†’ checkpoint-5300}/rng_state_0.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d2fb5eb561d4b92c89062c2939861ac27d199067133d2975e1e2aecd6254425
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9267467db4bfc0b62f4b2992b98c478568d6b740025cdb5016f4102da1504163
3
  size 27772
{checkpoint-4900 β†’ checkpoint-5300}/rng_state_1.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:845f7f7308d4eb8331386ad93aebcacf5679b5a7f2b26d79e0e881c33768d801
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea0db0e586ec1c1e547243b2e02d8b0d760f3d113e51420b567322fb80b86283
3
  size 27772
{checkpoint-4900 β†’ checkpoint-5300}/rng_state_10.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac19165b3d0d8492404b2597a692a25174dc25bbebee0818cc70fb91e7fa66a9
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcfb12512c0be896f0842434c14afaa23ede7d6aadbd83bfd18570859fcdecdd
3
  size 27789
{checkpoint-4900 β†’ checkpoint-5300}/rng_state_11.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ba175f65490c813c43be1b8e90c05273b365a5fc35034a39d3e40171b6649d8
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcd57397386863534e25c2052e0eb5a3986965528e8332bead4e6da6c8a52a60
3
  size 27789
{checkpoint-4900 β†’ checkpoint-5300}/rng_state_12.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff1e6d6959816301feff7a9806d0ca028fe2a211e238b9dca8fd390289758baa
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b494220b086ab33c457cd9cb3298d5707ae27861e1a1ea15cf7b2846c34edb3
3
  size 27789
{checkpoint-4900 β†’ checkpoint-5300}/rng_state_13.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1211e56e1d3e784cd9f700dcd2e4b8d94627474333f9ea95573cbff916f6590
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66ddbbc44b73707a46dbe70a821775b9215bfb3188b97867dfd5d788f2c4bd1b
3
  size 27789
{checkpoint-4900 β†’ checkpoint-5300}/rng_state_2.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a4a16265fbd33a987004f285a9ac8e9d11fa74e2f1e1ebfae43b07d355feb1a
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e09271dc29724c758580e8e6864e76555b7a1dd95ca5d0036f08cdd9f67fbaec
3
  size 27772
{checkpoint-4900 β†’ checkpoint-5300}/rng_state_3.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25be8bdc125a49f8faa5491285fe8c51cbd061cd60a12c31e1d294969eaeed1b
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65b0c519b8283902066db24d39bebbf0ec0f57d632779c56be9c9d58f7cdb78e
3
  size 27772
{checkpoint-4900 β†’ checkpoint-5300}/rng_state_4.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8edc43261484ceb88153a0cf622a2f1d79ae2bd2a22d2cfc258fbc4d67c9fcf
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:868a6a26d82f017652ee4b9e21c96ec36ab5f96be4eeedf65e27eda71ff93f25
3
  size 27772
{checkpoint-4900 β†’ checkpoint-5300}/rng_state_5.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3c01adf24a8fb5ced49e768a00799034bf23fdbe170faf373d7bffc71cf330e
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d82c47d40cdc9cbed8bc1b1f8fdcb354fb8b6ac525cfce07fab2d1af5791c195
3
  size 27772
{checkpoint-4900 β†’ checkpoint-5300}/rng_state_6.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1431fdce90c9dffa41af7e45225d7e937f8a892f36234dd2c9c0c5ea825d344b
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64d8ec328ebab19d4fa69241bbb5de66af7b09aa5f0d71d9e37be38a5170d24e
3
  size 27772
{checkpoint-4900 β†’ checkpoint-5300}/rng_state_7.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:698169f9adbf09c9e392876211e57228599d852964332781ccccb91b02c99668
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf9d89caef032992b8f5efcdaf7ab987e37e4fb52ec8d47520f814092e1e7ab3
3
  size 27772
{checkpoint-4900 β†’ checkpoint-5300}/rng_state_8.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e12b62affc946cb152ae7675f1074ed4a8c28a930020230d21a02278a8cfc66
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8913ebfea32419c486eaa9895c49a7962a44933cd37f4965710a3560fe737c25
3
  size 27772
{checkpoint-4900 β†’ checkpoint-5300}/rng_state_9.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d0857929e84e3ee4f8ead280fd3f99826ceb6e6e749cce250845a12d23d0cc7
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:685f43d3417ce77333bd61cb9e79c3d6e0421bb659bae8689caafc6c6b0d8752
3
  size 27772
{checkpoint-4900 β†’ checkpoint-5300}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fce2b4a13969f53fdf68394680e1b829aa39975e2753c73d5d093518e2315b1c
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d904fb502ced275ef5e7925781f477f4c89d48daa60e9f45b9339adf1aaa77d
3
  size 627
{checkpoint-4900 β†’ checkpoint-5300}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.8999612252811167,
5
- "global_step": 4900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1190,11 +1190,115 @@
1190
  "learning_rate": 5.946628610250484e-05,
1191
  "loss": 0.7918,
1192
  "step": 4900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1193
  }
1194
  ],
1195
  "max_steps": 7737,
1196
  "num_train_epochs": 3,
1197
- "total_flos": 2.1101658419601867e+19,
1198
  "trial_name": null,
1199
  "trial_params": null
1200
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.055060100814269,
5
+ "global_step": 5300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1190
  "learning_rate": 5.946628610250484e-05,
1191
  "loss": 0.7918,
1192
  "step": 4900
1193
+ },
1194
+ {
1195
+ "epoch": 1.91,
1196
+ "learning_rate": 5.853920568929996e-05,
1197
+ "loss": 0.7921,
1198
+ "step": 4925
1199
+ },
1200
+ {
1201
+ "epoch": 1.92,
1202
+ "learning_rate": 5.761640870750799e-05,
1203
+ "loss": 0.7878,
1204
+ "step": 4950
1205
+ },
1206
+ {
1207
+ "epoch": 1.93,
1208
+ "learning_rate": 5.669799049388375e-05,
1209
+ "loss": 0.7901,
1210
+ "step": 4975
1211
+ },
1212
+ {
1213
+ "epoch": 1.94,
1214
+ "learning_rate": 5.578404593279911e-05,
1215
+ "loss": 0.7858,
1216
+ "step": 5000
1217
+ },
1218
+ {
1219
+ "epoch": 1.94,
1220
+ "eval_loss": 0.807844877243042,
1221
+ "eval_runtime": 59.586,
1222
+ "eval_samples_per_second": 12.251,
1223
+ "eval_steps_per_second": 0.889,
1224
+ "step": 5000
1225
+ },
1226
+ {
1227
+ "epoch": 1.95,
1228
+ "learning_rate": 5.487466944644033e-05,
1229
+ "loss": 0.7902,
1230
+ "step": 5025
1231
+ },
1232
+ {
1233
+ "epoch": 1.96,
1234
+ "learning_rate": 5.3969954985052996e-05,
1235
+ "loss": 0.7979,
1236
+ "step": 5050
1237
+ },
1238
+ {
1239
+ "epoch": 1.97,
1240
+ "learning_rate": 5.306999601723579e-05,
1241
+ "loss": 0.7931,
1242
+ "step": 5075
1243
+ },
1244
+ {
1245
+ "epoch": 1.98,
1246
+ "learning_rate": 5.21748855202839e-05,
1247
+ "loss": 0.7868,
1248
+ "step": 5100
1249
+ },
1250
+ {
1251
+ "epoch": 1.99,
1252
+ "learning_rate": 5.128471597058342e-05,
1253
+ "loss": 0.7993,
1254
+ "step": 5125
1255
+ },
1256
+ {
1257
+ "epoch": 2.0,
1258
+ "learning_rate": 5.03995793340572e-05,
1259
+ "loss": 0.7892,
1260
+ "step": 5150
1261
+ },
1262
+ {
1263
+ "epoch": 2.01,
1264
+ "learning_rate": 4.9519567056663694e-05,
1265
+ "loss": 0.7788,
1266
+ "step": 5175
1267
+ },
1268
+ {
1269
+ "epoch": 2.02,
1270
+ "learning_rate": 4.864477005494938e-05,
1271
+ "loss": 0.7654,
1272
+ "step": 5200
1273
+ },
1274
+ {
1275
+ "epoch": 2.03,
1276
+ "learning_rate": 4.777527870665592e-05,
1277
+ "loss": 0.7468,
1278
+ "step": 5225
1279
+ },
1280
+ {
1281
+ "epoch": 2.04,
1282
+ "learning_rate": 4.691118284138296e-05,
1283
+ "loss": 0.7359,
1284
+ "step": 5250
1285
+ },
1286
+ {
1287
+ "epoch": 2.05,
1288
+ "learning_rate": 4.605257173130763e-05,
1289
+ "loss": 0.7422,
1290
+ "step": 5275
1291
+ },
1292
+ {
1293
+ "epoch": 2.06,
1294
+ "learning_rate": 4.519953408196152e-05,
1295
+ "loss": 0.7424,
1296
+ "step": 5300
1297
  }
1298
  ],
1299
  "max_steps": 7737,
1300
  "num_train_epochs": 3,
1301
+ "total_flos": 2.282482987033428e+19,
1302
  "trial_name": null,
1303
  "trial_params": null
1304
  }
{checkpoint-4900 β†’ checkpoint-5300}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:354c4769acaf21317d8ab58ba2dd133763f3bee1aec7488fb334e45f3e80ebc3
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:689d52379bcc7c50e04c40b22a97b473b8de3f17b4096bebf81eb9f37e1dafa6
3
  size 4027
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5cc18faa1420e425c2fed06bfe2dd967461487c15531bd94429b7a3c0c02a49
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:689d52379bcc7c50e04c40b22a97b473b8de3f17b4096bebf81eb9f37e1dafa6
3
  size 4027