ZeroUniqueness commited on
Commit
3ca1cd1
β€’
1 Parent(s): 39da2b1
Files changed (50) hide show
  1. README.md +13 -0
  2. adapter_model.bin +1 -1
  3. checkpoint-77500/adapter_model.bin +0 -3
  4. checkpoint-77500/adapter_model/adapter_model.bin +0 -3
  5. checkpoint-78000/adapter_model.bin +0 -3
  6. checkpoint-78000/adapter_model/adapter_model.bin +0 -3
  7. {checkpoint-76500 β†’ checkpoint-78500}/README.md +0 -0
  8. {checkpoint-76500 β†’ checkpoint-78500}/adapter_config.json +0 -0
  9. {checkpoint-76500 β†’ checkpoint-78500}/adapter_model.bin +1 -1
  10. {checkpoint-76500 β†’ checkpoint-78500}/adapter_model/README.md +0 -0
  11. {checkpoint-76500 β†’ checkpoint-78500}/adapter_model/adapter_config.json +0 -0
  12. {checkpoint-76500 β†’ checkpoint-78500}/adapter_model/adapter_model.bin +1 -1
  13. {checkpoint-77000 β†’ checkpoint-78500}/optimizer.pt +1 -1
  14. {checkpoint-77000 β†’ checkpoint-78500}/rng_state.pth +1 -1
  15. {checkpoint-77500 β†’ checkpoint-78500}/scheduler.pt +1 -1
  16. {checkpoint-78000 β†’ checkpoint-78500}/trainer_state.json +19 -5
  17. {checkpoint-76500 β†’ checkpoint-78500}/training_args.bin +0 -0
  18. {checkpoint-77000 β†’ checkpoint-79000}/README.md +0 -0
  19. {checkpoint-77000 β†’ checkpoint-79000}/adapter_config.json +0 -0
  20. {checkpoint-77000 β†’ checkpoint-79000}/adapter_model.bin +1 -1
  21. {checkpoint-77000 β†’ checkpoint-79000}/adapter_model/README.md +0 -0
  22. {checkpoint-77000 β†’ checkpoint-79000}/adapter_model/adapter_config.json +0 -0
  23. {checkpoint-77000 β†’ checkpoint-79000}/adapter_model/adapter_model.bin +1 -1
  24. {checkpoint-77500 β†’ checkpoint-79000}/optimizer.pt +1 -1
  25. {checkpoint-77500 β†’ checkpoint-79000}/rng_state.pth +1 -1
  26. {checkpoint-77000 β†’ checkpoint-79000}/scheduler.pt +1 -1
  27. {checkpoint-77500 β†’ checkpoint-79000}/trainer_state.json +47 -5
  28. {checkpoint-77000 β†’ checkpoint-79000}/training_args.bin +0 -0
  29. {checkpoint-77500 β†’ checkpoint-79500}/README.md +0 -0
  30. {checkpoint-77500 β†’ checkpoint-79500}/adapter_config.json +0 -0
  31. checkpoint-79500/adapter_model.bin +3 -0
  32. {checkpoint-77500 β†’ checkpoint-79500}/adapter_model/README.md +0 -0
  33. {checkpoint-77500 β†’ checkpoint-79500}/adapter_model/adapter_config.json +0 -0
  34. checkpoint-79500/adapter_model/adapter_model.bin +3 -0
  35. {checkpoint-76500 β†’ checkpoint-79500}/optimizer.pt +1 -1
  36. {checkpoint-78000 β†’ checkpoint-79500}/rng_state.pth +1 -1
  37. {checkpoint-76500 β†’ checkpoint-79500}/scheduler.pt +1 -1
  38. {checkpoint-77000 β†’ checkpoint-79500}/trainer_state.json +75 -5
  39. {checkpoint-77500 β†’ checkpoint-79500}/training_args.bin +0 -0
  40. {checkpoint-78000 β†’ checkpoint-80000}/README.md +0 -0
  41. {checkpoint-78000 β†’ checkpoint-80000}/adapter_config.json +0 -0
  42. checkpoint-80000/adapter_model.bin +3 -0
  43. {checkpoint-78000 β†’ checkpoint-80000}/adapter_model/README.md +0 -0
  44. {checkpoint-78000 β†’ checkpoint-80000}/adapter_model/adapter_config.json +0 -0
  45. checkpoint-80000/adapter_model/adapter_model.bin +3 -0
  46. {checkpoint-78000 β†’ checkpoint-80000}/optimizer.pt +1 -1
  47. {checkpoint-76500 β†’ checkpoint-80000}/rng_state.pth +1 -1
  48. {checkpoint-78000 β†’ checkpoint-80000}/scheduler.pt +1 -1
  49. {checkpoint-76500 β†’ checkpoint-80000}/trainer_state.json +103 -5
  50. {checkpoint-78000 β†’ checkpoint-80000}/training_args.bin +0 -0
README.md CHANGED
@@ -4,6 +4,18 @@ library_name: peft
4
  ## Training procedure
5
 
6
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  The following `bitsandbytes` quantization config was used during training:
8
  - load_in_8bit: False
9
  - load_in_4bit: True
@@ -76,5 +88,6 @@ The following `bitsandbytes` quantization config was used during training:
76
  - PEFT 0.5.0.dev0
77
  - PEFT 0.5.0.dev0
78
  - PEFT 0.5.0.dev0
 
79
 
80
  - PEFT 0.5.0.dev0
 
4
  ## Training procedure
5
 
6
 
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: bfloat16
18
+
19
  The following `bitsandbytes` quantization config was used during training:
20
  - load_in_8bit: False
21
  - load_in_4bit: True
 
88
  - PEFT 0.5.0.dev0
89
  - PEFT 0.5.0.dev0
90
  - PEFT 0.5.0.dev0
91
+ - PEFT 0.5.0.dev0
92
 
93
  - PEFT 0.5.0.dev0
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a95e49326f95cbf72dea87596b1fd0bcc1abcccd06310612c0ac6bdb3314bc1
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a63171e37485368f4ae8c7a1aa1cb52caf8554a5c9b41c606ea3445d147683d5
3
  size 500897101
checkpoint-77500/adapter_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a884eb86c6d63307c9acb52dd6a2b1b5697accc52a54a952355462a2d1ff4bb1
3
- size 500897101
 
 
 
 
checkpoint-77500/adapter_model/adapter_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a884eb86c6d63307c9acb52dd6a2b1b5697accc52a54a952355462a2d1ff4bb1
3
- size 500897101
 
 
 
 
checkpoint-78000/adapter_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a95e49326f95cbf72dea87596b1fd0bcc1abcccd06310612c0ac6bdb3314bc1
3
- size 500897101
 
 
 
 
checkpoint-78000/adapter_model/adapter_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a95e49326f95cbf72dea87596b1fd0bcc1abcccd06310612c0ac6bdb3314bc1
3
- size 500897101
 
 
 
 
{checkpoint-76500 β†’ checkpoint-78500}/README.md RENAMED
File without changes
{checkpoint-76500 β†’ checkpoint-78500}/adapter_config.json RENAMED
File without changes
{checkpoint-76500 β†’ checkpoint-78500}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3aac0f31027f89872a0b3f8a0a220bec95e100567f0e22fd44826f62a28c01a6
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a63171e37485368f4ae8c7a1aa1cb52caf8554a5c9b41c606ea3445d147683d5
3
  size 500897101
{checkpoint-76500 β†’ checkpoint-78500}/adapter_model/README.md RENAMED
File without changes
{checkpoint-76500 β†’ checkpoint-78500}/adapter_model/adapter_config.json RENAMED
File without changes
{checkpoint-76500 β†’ checkpoint-78500}/adapter_model/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3aac0f31027f89872a0b3f8a0a220bec95e100567f0e22fd44826f62a28c01a6
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a63171e37485368f4ae8c7a1aa1cb52caf8554a5c9b41c606ea3445d147683d5
3
  size 500897101
{checkpoint-77000 β†’ checkpoint-78500}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b09958ee930a5c1cdb447dc5ca98b44b0a8ac3e23351c47128a6daf915aa3809
3
  size 1001724605
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b94c97020101dfe8988e5cf3a67d34dc5cd78e5e4685fc7e9f6428c508ff53ce
3
  size 1001724605
{checkpoint-77000 β†’ checkpoint-78500}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f162395a6f7d2e1af70b53e882440048027967f43d5301d750609c6c591e4ca3
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b76070aa3a97b4296cfe30e77dcd74707dea42092548a8b07d9605050d31f3af
3
  size 14575
{checkpoint-77500 β†’ checkpoint-78500}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea97f91717d4e226e2157501dd3a83bc130311da389f901511bd22351a008c26
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0b4a7a47b7400044f7b7933b36f7c700f5c879afac5320d98411ec041812fdc
3
  size 627
{checkpoint-78000 β†’ checkpoint-78500}/trainer_state.json RENAMED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.4757947325706482,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-78000",
4
- "epoch": 2.908168972074121,
5
  "eval_steps": 500,
6
- "global_step": 78000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4551,13 +4551,27 @@
4551
  "eval_samples_per_second": 0.427,
4552
  "eval_steps_per_second": 0.427,
4553
  "step": 78000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4554
  }
4555
  ],
4556
  "logging_steps": 500,
4557
  "max_steps": 80463,
4558
  "num_train_epochs": 3,
4559
  "save_steps": 500,
4560
- "total_flos": 2.1910553027265577e+19,
4561
  "trial_name": null,
4562
  "trial_params": null
4563
  }
 
1
  {
2
+ "best_metric": 0.47572794556617737,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-78500",
4
+ "epoch": 2.926811080869468,
5
  "eval_steps": 500,
6
+ "global_step": 78500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4551
  "eval_samples_per_second": 0.427,
4552
  "eval_steps_per_second": 0.427,
4553
  "step": 78000
4554
+ },
4555
+ {
4556
+ "epoch": 2.93,
4557
+ "learning_rate": 2.9363864105907967e-07,
4558
+ "loss": 0.3633,
4559
+ "step": 78500
4560
+ },
4561
+ {
4562
+ "epoch": 2.93,
4563
+ "eval_loss": 0.47572794556617737,
4564
+ "eval_runtime": 1284.2805,
4565
+ "eval_samples_per_second": 0.422,
4566
+ "eval_steps_per_second": 0.422,
4567
+ "step": 78500
4568
  }
4569
  ],
4570
  "logging_steps": 500,
4571
  "max_steps": 80463,
4572
  "num_train_epochs": 3,
4573
  "save_steps": 500,
4574
+ "total_flos": 2.205083454877704e+19,
4575
  "trial_name": null,
4576
  "trial_params": null
4577
  }
{checkpoint-76500 β†’ checkpoint-78500}/training_args.bin RENAMED
File without changes
{checkpoint-77000 β†’ checkpoint-79000}/README.md RENAMED
File without changes
{checkpoint-77000 β†’ checkpoint-79000}/adapter_config.json RENAMED
File without changes
{checkpoint-77000 β†’ checkpoint-79000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1427c39ed89366176a26d42309badaecc4eaac4173d3df01371a38612540cbce
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54cdaa3b50871ca412658af6dafb529a52b86753ff9526fee0e1b74954640aa4
3
  size 500897101
{checkpoint-77000 β†’ checkpoint-79000}/adapter_model/README.md RENAMED
File without changes
{checkpoint-77000 β†’ checkpoint-79000}/adapter_model/adapter_config.json RENAMED
File without changes
{checkpoint-77000 β†’ checkpoint-79000}/adapter_model/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1427c39ed89366176a26d42309badaecc4eaac4173d3df01371a38612540cbce
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54cdaa3b50871ca412658af6dafb529a52b86753ff9526fee0e1b74954640aa4
3
  size 500897101
{checkpoint-77500 β†’ checkpoint-79000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21a191193d80b976badbd98b5f53b496006b6f2a29af8b6d8dca0b1e0b7ecbe4
3
  size 1001724605
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50d0a52aeee43e33265bda6c05b10da12477313f3f2c4ef29276aa29bb0cd412
3
  size 1001724605
{checkpoint-77500 β†’ checkpoint-79000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:077d679486328cc243c50097dc890c036a89503397938038ef9689bd7097c327
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eac837ea1eac58f4aaf90b3c888590c6a34139d2391df0c00f93ad613d9d2fe4
3
  size 14575
{checkpoint-77000 β†’ checkpoint-79000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35a18202efac039c56b779ce26337552adb710311faff67d76d05cf3142d22af
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:818c4b15e458686761dbc223e6f958ab0a422ba783d422c1d2eee54f27b9caa2
3
  size 627
{checkpoint-77500 β†’ checkpoint-79000}/trainer_state.json RENAMED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.47582224011421204,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-77000",
4
- "epoch": 2.889526863278774,
5
  "eval_steps": 500,
6
- "global_step": 77500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4537,13 +4537,55 @@
4537
  "eval_samples_per_second": 0.413,
4538
  "eval_steps_per_second": 0.413,
4539
  "step": 77500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4540
  }
4541
  ],
4542
  "logging_steps": 500,
4543
  "max_steps": 80463,
4544
  "num_train_epochs": 3,
4545
  "save_steps": 500,
4546
- "total_flos": 2.1768185355260805e+19,
4547
  "trial_name": null,
4548
  "trial_params": null
4549
  }
 
1
  {
2
+ "best_metric": 0.47572794556617737,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-78500",
4
+ "epoch": 2.945453189664815,
5
  "eval_steps": 500,
6
+ "global_step": 79000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4537
  "eval_samples_per_second": 0.413,
4538
  "eval_steps_per_second": 0.413,
4539
  "step": 77500
4540
+ },
4541
+ {
4542
+ "epoch": 2.91,
4543
+ "learning_rate": 4.6214609844061894e-07,
4544
+ "loss": 0.3696,
4545
+ "step": 78000
4546
+ },
4547
+ {
4548
+ "epoch": 2.91,
4549
+ "eval_loss": 0.4757947325706482,
4550
+ "eval_runtime": 1268.9631,
4551
+ "eval_samples_per_second": 0.427,
4552
+ "eval_steps_per_second": 0.427,
4553
+ "step": 78000
4554
+ },
4555
+ {
4556
+ "epoch": 2.93,
4557
+ "learning_rate": 2.9363864105907967e-07,
4558
+ "loss": 0.3633,
4559
+ "step": 78500
4560
+ },
4561
+ {
4562
+ "epoch": 2.93,
4563
+ "eval_loss": 0.47572794556617737,
4564
+ "eval_runtime": 1284.2805,
4565
+ "eval_samples_per_second": 0.422,
4566
+ "eval_steps_per_second": 0.422,
4567
+ "step": 78500
4568
+ },
4569
+ {
4570
+ "epoch": 2.95,
4571
+ "learning_rate": 1.6313824917496555e-07,
4572
+ "loss": 0.3712,
4573
+ "step": 79000
4574
+ },
4575
+ {
4576
+ "epoch": 2.95,
4577
+ "eval_loss": 0.47579219937324524,
4578
+ "eval_runtime": 1333.9827,
4579
+ "eval_samples_per_second": 0.406,
4580
+ "eval_steps_per_second": 0.406,
4581
+ "step": 79000
4582
  }
4583
  ],
4584
  "logging_steps": 500,
4585
  "max_steps": 80463,
4586
  "num_train_epochs": 3,
4587
  "save_steps": 500,
4588
+ "total_flos": 2.219205598424703e+19,
4589
  "trial_name": null,
4590
  "trial_params": null
4591
  }
{checkpoint-77000 β†’ checkpoint-79000}/training_args.bin RENAMED
File without changes
{checkpoint-77500 β†’ checkpoint-79500}/README.md RENAMED
File without changes
{checkpoint-77500 β†’ checkpoint-79500}/adapter_config.json RENAMED
File without changes
checkpoint-79500/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aec4531d523e5f1c6d00064a04d505d2c8cc46f2254d5f97c50a6d3f19d57e7
3
+ size 500897101
{checkpoint-77500 β†’ checkpoint-79500}/adapter_model/README.md RENAMED
File without changes
{checkpoint-77500 β†’ checkpoint-79500}/adapter_model/adapter_config.json RENAMED
File without changes
checkpoint-79500/adapter_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aec4531d523e5f1c6d00064a04d505d2c8cc46f2254d5f97c50a6d3f19d57e7
3
+ size 500897101
{checkpoint-76500 β†’ checkpoint-79500}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbbb89893d47261fd01a9f3778a4172d980e5d5d899645ffb5c27307d67df6b6
3
  size 1001724605
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66d9344a9200b364b04ea1c7047cb7f771a81a931263b0a8299e74a1dbc21123
3
  size 1001724605
{checkpoint-78000 β†’ checkpoint-79500}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cae952b189b908268d157bce31712cd2487d9ad50a53ef0ae319c8f965d6c13c
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66d629d451a682f1240d2ed68e0aa31eff4552d6f44511ee41e17ea77a083291
3
  size 14575
{checkpoint-76500 β†’ checkpoint-79500}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:220829e44432d3fbc0f85c119c5401ba1c2989f858b8188acbaba28a1379d42e
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85630629004d37069a695741c2bbf4ff767e7519f39623048a26a45a1e93ca39
3
  size 627
{checkpoint-77000 β†’ checkpoint-79500}/trainer_state.json RENAMED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.47582224011421204,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-77000",
4
- "epoch": 2.870884754483427,
5
  "eval_steps": 500,
6
- "global_step": 77000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4523,13 +4523,83 @@
4523
  "eval_samples_per_second": 0.419,
4524
  "eval_steps_per_second": 0.419,
4525
  "step": 77000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4526
  }
4527
  ],
4528
  "logging_steps": 500,
4529
  "max_steps": 80463,
4530
  "num_train_epochs": 3,
4531
  "save_steps": 500,
4532
- "total_flos": 2.1626487594830807e+19,
4533
  "trial_name": null,
4534
  "trial_params": null
4535
  }
 
1
  {
2
+ "best_metric": 0.47572794556617737,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-78500",
4
+ "epoch": 2.964095298460162,
5
  "eval_steps": 500,
6
+ "global_step": 79500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4523
  "eval_samples_per_second": 0.419,
4524
  "eval_steps_per_second": 0.419,
4525
  "step": 77000
4526
+ },
4527
+ {
4528
+ "epoch": 2.89,
4529
+ "learning_rate": 6.685963879659362e-07,
4530
+ "loss": 0.3675,
4531
+ "step": 77500
4532
+ },
4533
+ {
4534
+ "epoch": 2.89,
4535
+ "eval_loss": 0.4758478105068207,
4536
+ "eval_runtime": 1311.0096,
4537
+ "eval_samples_per_second": 0.413,
4538
+ "eval_steps_per_second": 0.413,
4539
+ "step": 77500
4540
+ },
4541
+ {
4542
+ "epoch": 2.91,
4543
+ "learning_rate": 4.6214609844061894e-07,
4544
+ "loss": 0.3696,
4545
+ "step": 78000
4546
+ },
4547
+ {
4548
+ "epoch": 2.91,
4549
+ "eval_loss": 0.4757947325706482,
4550
+ "eval_runtime": 1268.9631,
4551
+ "eval_samples_per_second": 0.427,
4552
+ "eval_steps_per_second": 0.427,
4553
+ "step": 78000
4554
+ },
4555
+ {
4556
+ "epoch": 2.93,
4557
+ "learning_rate": 2.9363864105907967e-07,
4558
+ "loss": 0.3633,
4559
+ "step": 78500
4560
+ },
4561
+ {
4562
+ "epoch": 2.93,
4563
+ "eval_loss": 0.47572794556617737,
4564
+ "eval_runtime": 1284.2805,
4565
+ "eval_samples_per_second": 0.422,
4566
+ "eval_steps_per_second": 0.422,
4567
+ "step": 78500
4568
+ },
4569
+ {
4570
+ "epoch": 2.95,
4571
+ "learning_rate": 1.6313824917496555e-07,
4572
+ "loss": 0.3712,
4573
+ "step": 79000
4574
+ },
4575
+ {
4576
+ "epoch": 2.95,
4577
+ "eval_loss": 0.47579219937324524,
4578
+ "eval_runtime": 1333.9827,
4579
+ "eval_samples_per_second": 0.406,
4580
+ "eval_steps_per_second": 0.406,
4581
+ "step": 79000
4582
+ },
4583
+ {
4584
+ "epoch": 2.96,
4585
+ "learning_rate": 7.069466822952065e-08,
4586
+ "loss": 0.37,
4587
+ "step": 79500
4588
+ },
4589
+ {
4590
+ "epoch": 2.96,
4591
+ "eval_loss": 0.47579482197761536,
4592
+ "eval_runtime": 1343.7136,
4593
+ "eval_samples_per_second": 0.403,
4594
+ "eval_steps_per_second": 0.403,
4595
+ "step": 79500
4596
  }
4597
  ],
4598
  "logging_steps": 500,
4599
  "max_steps": 80463,
4600
  "num_train_epochs": 3,
4601
  "save_steps": 500,
4602
+ "total_flos": 2.2334186767367946e+19,
4603
  "trial_name": null,
4604
  "trial_params": null
4605
  }
{checkpoint-77500 β†’ checkpoint-79500}/training_args.bin RENAMED
File without changes
{checkpoint-78000 β†’ checkpoint-80000}/README.md RENAMED
File without changes
{checkpoint-78000 β†’ checkpoint-80000}/adapter_config.json RENAMED
File without changes
checkpoint-80000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb4d05e080adb0e5195e39d104132e2826c45ae5f84e265fc57e9babe3f31e2e
3
+ size 500897101
{checkpoint-78000 β†’ checkpoint-80000}/adapter_model/README.md RENAMED
File without changes
{checkpoint-78000 β†’ checkpoint-80000}/adapter_model/adapter_config.json RENAMED
File without changes
checkpoint-80000/adapter_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb4d05e080adb0e5195e39d104132e2826c45ae5f84e265fc57e9babe3f31e2e
3
+ size 500897101
{checkpoint-78000 β†’ checkpoint-80000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad0ade3b6ce4741aa4976bb9b0aae8b16a0605bea6020968a870961a9dc6cf7f
3
  size 1001724605
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d872aa8649276d07a16445e8ad1f9857b024aeb2bc7ef434f10697d68093bd70
3
  size 1001724605
{checkpoint-76500 β†’ checkpoint-80000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77cc93baddf8ab32d29af2c1c219f38a87a8af2868d723737c13df696ee1f2ad
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88aed6b668511a80acef03aeb5954058975243c279e1a2f0605b6635eddb4d13
3
  size 14575
{checkpoint-78000 β†’ checkpoint-80000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:495a1512040b6a087956ab3f68fd532032c899c1dae932d93c4b1ba403b50d1d
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07c85d472815ff7897a1300024f3cab1032a73a2e8ce83cd10c397998eec29c6
3
  size 627
{checkpoint-76500 β†’ checkpoint-80000}/trainer_state.json RENAMED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.4760077893733978,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-76500",
4
- "epoch": 2.8522426456880803,
5
  "eval_steps": 500,
6
- "global_step": 76500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4509,13 +4509,111 @@
4509
  "eval_samples_per_second": 0.419,
4510
  "eval_steps_per_second": 0.419,
4511
  "step": 76500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4512
  }
4513
  ],
4514
  "logging_steps": 500,
4515
  "max_steps": 80463,
4516
  "num_train_epochs": 3,
4517
  "save_steps": 500,
4518
- "total_flos": 2.14853629526682e+19,
4519
  "trial_name": null,
4520
  "trial_params": null
4521
  }
 
1
  {
2
+ "best_metric": 0.47572794556617737,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-78500",
4
+ "epoch": 2.9827374072555086,
5
  "eval_steps": 500,
6
+ "global_step": 80000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4509
  "eval_samples_per_second": 0.419,
4510
  "eval_steps_per_second": 0.419,
4511
  "step": 76500
4512
+ },
4513
+ {
4514
+ "epoch": 2.87,
4515
+ "learning_rate": 9.129108128541176e-07,
4516
+ "loss": 0.3658,
4517
+ "step": 77000
4518
+ },
4519
+ {
4520
+ "epoch": 2.87,
4521
+ "eval_loss": 0.47582224011421204,
4522
+ "eval_runtime": 1293.6591,
4523
+ "eval_samples_per_second": 0.419,
4524
+ "eval_steps_per_second": 0.419,
4525
+ "step": 77000
4526
+ },
4527
+ {
4528
+ "epoch": 2.89,
4529
+ "learning_rate": 6.685963879659362e-07,
4530
+ "loss": 0.3675,
4531
+ "step": 77500
4532
+ },
4533
+ {
4534
+ "epoch": 2.89,
4535
+ "eval_loss": 0.4758478105068207,
4536
+ "eval_runtime": 1311.0096,
4537
+ "eval_samples_per_second": 0.413,
4538
+ "eval_steps_per_second": 0.413,
4539
+ "step": 77500
4540
+ },
4541
+ {
4542
+ "epoch": 2.91,
4543
+ "learning_rate": 4.6214609844061894e-07,
4544
+ "loss": 0.3696,
4545
+ "step": 78000
4546
+ },
4547
+ {
4548
+ "epoch": 2.91,
4549
+ "eval_loss": 0.4757947325706482,
4550
+ "eval_runtime": 1268.9631,
4551
+ "eval_samples_per_second": 0.427,
4552
+ "eval_steps_per_second": 0.427,
4553
+ "step": 78000
4554
+ },
4555
+ {
4556
+ "epoch": 2.93,
4557
+ "learning_rate": 2.9363864105907967e-07,
4558
+ "loss": 0.3633,
4559
+ "step": 78500
4560
+ },
4561
+ {
4562
+ "epoch": 2.93,
4563
+ "eval_loss": 0.47572794556617737,
4564
+ "eval_runtime": 1284.2805,
4565
+ "eval_samples_per_second": 0.422,
4566
+ "eval_steps_per_second": 0.422,
4567
+ "step": 78500
4568
+ },
4569
+ {
4570
+ "epoch": 2.95,
4571
+ "learning_rate": 1.6313824917496555e-07,
4572
+ "loss": 0.3712,
4573
+ "step": 79000
4574
+ },
4575
+ {
4576
+ "epoch": 2.95,
4577
+ "eval_loss": 0.47579219937324524,
4578
+ "eval_runtime": 1333.9827,
4579
+ "eval_samples_per_second": 0.406,
4580
+ "eval_steps_per_second": 0.406,
4581
+ "step": 79000
4582
+ },
4583
+ {
4584
+ "epoch": 2.96,
4585
+ "learning_rate": 7.069466822952065e-08,
4586
+ "loss": 0.37,
4587
+ "step": 79500
4588
+ },
4589
+ {
4590
+ "epoch": 2.96,
4591
+ "eval_loss": 0.47579482197761536,
4592
+ "eval_runtime": 1343.7136,
4593
+ "eval_samples_per_second": 0.403,
4594
+ "eval_steps_per_second": 0.403,
4595
+ "step": 79500
4596
+ },
4597
+ {
4598
+ "epoch": 2.98,
4599
+ "learning_rate": 1.6343136789165324e-08,
4600
+ "loss": 0.3647,
4601
+ "step": 80000
4602
+ },
4603
+ {
4604
+ "epoch": 2.98,
4605
+ "eval_loss": 0.475759357213974,
4606
+ "eval_runtime": 1306.8248,
4607
+ "eval_samples_per_second": 0.415,
4608
+ "eval_steps_per_second": 0.415,
4609
+ "step": 80000
4610
  }
4611
  ],
4612
  "logging_steps": 500,
4613
  "max_steps": 80463,
4614
  "num_train_epochs": 3,
4615
  "save_steps": 500,
4616
+ "total_flos": 2.2475568675952804e+19,
4617
  "trial_name": null,
4618
  "trial_params": null
4619
  }
{checkpoint-78000 β†’ checkpoint-80000}/training_args.bin RENAMED
File without changes