ZeroUniqueness commited on
Commit
c7bce51
Β·
1 Parent(s): bf6bfbe

Training in progress, step 3200

Browse files
Files changed (25) hide show
  1. adapter_model.bin +1 -1
  2. {checkpoint-2800 β†’ checkpoint-3100/adapter_model}/README.md +0 -0
  3. {checkpoint-2800 β†’ checkpoint-3100/adapter_model}/adapter_config.json +4 -4
  4. {checkpoint-2800 β†’ checkpoint-3100/adapter_model}/adapter_model.bin +1 -1
  5. checkpoint-3200/README.md +20 -0
  6. checkpoint-3200/adapter_config.json +26 -0
  7. checkpoint-3200/adapter_model.bin +3 -0
  8. {checkpoint-2800 β†’ checkpoint-3200}/optimizer.pt +1 -1
  9. {checkpoint-2800 β†’ checkpoint-3200}/rng_state_0.pth +1 -1
  10. {checkpoint-2800 β†’ checkpoint-3200}/rng_state_1.pth +1 -1
  11. {checkpoint-2800 β†’ checkpoint-3200}/rng_state_10.pth +1 -1
  12. {checkpoint-2800 β†’ checkpoint-3200}/rng_state_11.pth +1 -1
  13. {checkpoint-2800 β†’ checkpoint-3200}/rng_state_12.pth +1 -1
  14. {checkpoint-2800 β†’ checkpoint-3200}/rng_state_13.pth +1 -1
  15. {checkpoint-2800 β†’ checkpoint-3200}/rng_state_2.pth +1 -1
  16. {checkpoint-2800 β†’ checkpoint-3200}/rng_state_3.pth +1 -1
  17. {checkpoint-2800 β†’ checkpoint-3200}/rng_state_4.pth +1 -1
  18. {checkpoint-2800 β†’ checkpoint-3200}/rng_state_5.pth +1 -1
  19. {checkpoint-2800 β†’ checkpoint-3200}/rng_state_6.pth +1 -1
  20. {checkpoint-2800 β†’ checkpoint-3200}/rng_state_7.pth +1 -1
  21. {checkpoint-2800 β†’ checkpoint-3200}/rng_state_8.pth +1 -1
  22. {checkpoint-2800 β†’ checkpoint-3200}/rng_state_9.pth +1 -1
  23. {checkpoint-2800 β†’ checkpoint-3200}/scheduler.pt +1 -1
  24. {checkpoint-2800 β†’ checkpoint-3200}/trainer_state.json +107 -3
  25. {checkpoint-2800 β†’ checkpoint-3200}/training_args.bin +1 -1
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:604d08ed43fee049ea3799c6450a4a1d5f8cc6e58ddf33f377215e77f03ec769
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db5a92be0cd5f8b38b328e0f82e62452a3fa7b5052a0a1f93fd8c4b1dd18b7a7
3
  size 500897101
{checkpoint-2800 β†’ checkpoint-3100/adapter_model}/README.md RENAMED
File without changes
{checkpoint-2800 β†’ checkpoint-3100/adapter_model}/adapter_config.json RENAMED
@@ -14,13 +14,13 @@
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
 
 
 
17
  "v_proj",
18
  "o_proj",
19
- "down_proj",
20
  "q_proj",
21
- "gate_proj",
22
- "up_proj",
23
- "k_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
+ "down_proj",
18
+ "k_proj",
19
+ "gate_proj",
20
  "v_proj",
21
  "o_proj",
 
22
  "q_proj",
23
+ "up_proj"
 
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
{checkpoint-2800 β†’ checkpoint-3100/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ea36cf8e55409a14da3fc648b1dfbd6758083b72318e4742d7d9efc2963c331
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:604d08ed43fee049ea3799c6450a4a1d5f8cc6e58ddf33f377215e77f03ec769
3
  size 500897101
checkpoint-3200/README.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - load_in_8bit: False
9
+ - load_in_4bit: True
10
+ - llm_int8_threshold: 6.0
11
+ - llm_int8_skip_modules: None
12
+ - llm_int8_enable_fp32_cpu_offload: False
13
+ - llm_int8_has_fp16_weight: False
14
+ - bnb_4bit_quant_type: nf4
15
+ - bnb_4bit_use_double_quant: True
16
+ - bnb_4bit_compute_dtype: bfloat16
17
+ ### Framework versions
18
+
19
+
20
+ - PEFT 0.5.0.dev0
checkpoint-3200/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16",
4
+ "bias": "none",
5
+ "fan_in_fan_out": null,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 32,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "down_proj",
18
+ "k_proj",
19
+ "gate_proj",
20
+ "v_proj",
21
+ "o_proj",
22
+ "q_proj",
23
+ "up_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
checkpoint-3200/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db5a92be0cd5f8b38b328e0f82e62452a3fa7b5052a0a1f93fd8c4b1dd18b7a7
3
+ size 500897101
{checkpoint-2800 β†’ checkpoint-3200}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ad547cbbf1779bc43d5f541ea80ff23dfadca360f5b7fbe6b9210bdabd5b0a3
3
  size 1001752701
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:734792a711547c325f20f564fba3fc1dafb5a5e2eb00a8e88a628060b50ad4e0
3
  size 1001752701
{checkpoint-2800 β†’ checkpoint-3200}/rng_state_0.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54ceaf70d5ffe3e642b177d8e34d1f05742963af64a1bcc361535a9360b397c4
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47493f691983492eb548e89c4bc00a908f9336311a3cdb9b6d4d0436862a7afe
3
  size 27772
{checkpoint-2800 β†’ checkpoint-3200}/rng_state_1.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9607a8c2ef03708a5690aea65c1585d4ddbd8e01d7e279a5781ba30e8f1917ff
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fd2d4258cdbe26a4276e6adf71ed40f5fb449eeb7efadbcb60ee6c3eb243b4a
3
  size 27772
{checkpoint-2800 β†’ checkpoint-3200}/rng_state_10.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8681f4b5196d1fde8691794cb783ea1dab72c5077dbcf5394c6998bd4136e731
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1a66a04d36cd4ced3163ee7de7c3f532444837ca748b2c7402468c7cf9ac0cc
3
  size 27789
{checkpoint-2800 β†’ checkpoint-3200}/rng_state_11.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd6cb0b4fef9e193eda245bff5fbcc8df9769fc61966e5f1cdf256150779c1da
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fd816f782b7158df45bbb3a0847b7e567cbc8246b73c484a51cea72f837a615
3
  size 27789
{checkpoint-2800 β†’ checkpoint-3200}/rng_state_12.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b66ef66b2b0e1dce0083092ba63e49bdf210999432342b9c2a3993ca381ca353
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5c0e7d21944297ce2889bf41e83eaf69e29b09934c6e5396b81f21c8d554008
3
  size 27789
{checkpoint-2800 β†’ checkpoint-3200}/rng_state_13.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cec3ba0fdf94eca1cdf6294a4f5fddb5e3c4f431222a020f6b0acac68bcd0b2
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4a856ae9cc1a21a8770367e26ba40b6eb42e788a7f1046e05e030a2571733a3
3
  size 27789
{checkpoint-2800 β†’ checkpoint-3200}/rng_state_2.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c53913af6dbe6026773029476e95c35bab12c198a7f90d679010f0fd21198df
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21fe838aa7244c39844b0cc17fda41f0bf30a92e8c365cbf60e9909dc9539b7a
3
  size 27772
{checkpoint-2800 β†’ checkpoint-3200}/rng_state_3.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:811ad146e12ad4449d58b59f6fcb92f298ffd443e1a43261d70a55e3eb6343d9
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c73d64b42d450cb0cd9d780bb5d76b4532c040febbd3e57c1a7e507f307c6919
3
  size 27772
{checkpoint-2800 β†’ checkpoint-3200}/rng_state_4.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5da3f408ca3cd0e89a66524e7f4846e322256ba24237784526bdc51b9ede18b
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd75d438f8c213a76df48ea475d4e8d725ef60a192611b3a3b5156b54d9bd3aa
3
  size 27772
{checkpoint-2800 β†’ checkpoint-3200}/rng_state_5.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c659f82e6a038ab94307ae424a29fedf3767fe4eabecd42c8ea9b1fb19e36fc
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a92b4282b70600e8661d984a46aae33c95db7e5a45a98aa75d1ab7fe809f0e87
3
  size 27772
{checkpoint-2800 β†’ checkpoint-3200}/rng_state_6.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d5c97078c0cd3e6af8628033620fd2ade2aea530684443c37b10455b6826ae1
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60a6277ffeade78c02a3c80e695be2609d9c03891fa74c239da9236a5d8a411f
3
  size 27772
{checkpoint-2800 β†’ checkpoint-3200}/rng_state_7.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7dd4ff6e318b6e48ea5386ceaf02104c9a56c201311646c4a4e1f4c3be379fe7
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04ceb85b25ddf1ff875502c31946bedfdea45ff9b753957a01149a9fc85fe4aa
3
  size 27772
{checkpoint-2800 β†’ checkpoint-3200}/rng_state_8.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31d89de68577a860ceffdb664eb8eb7a44e5a8dd6aa62c351c0caf7b931e6a02
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7647a4e90848b95f8b12db8456e13797e34b5cb91f94f229dfe860ef88396c11
3
  size 27772
{checkpoint-2800 β†’ checkpoint-3200}/rng_state_9.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76b3e7281559d95415194847aba99c3c754c2f0ad8f10ac4eb274f044a763cf1
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4297218a6c8fddf475d7093a33dde8e1024bb348b2eacb353ad76d4cb02c652
3
  size 27772
{checkpoint-2800 β†’ checkpoint-3200}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0008ae7bce85f92437f0e149969fba2477d40c35a55a3a590f361670b6eb7290
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c684e0d6e24a062502c981434add298c33b176147f87f977608afcf00cc005c3
3
  size 627
{checkpoint-2800 β†’ checkpoint-3200}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0856921287320667,
5
- "global_step": 2800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -670,11 +670,115 @@
670
  "learning_rate": 0.0001422730849785107,
671
  "loss": 0.8091,
672
  "step": 2800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
673
  }
674
  ],
675
  "max_steps": 7737,
676
  "num_train_epochs": 3,
677
- "total_flos": 1.205858669606155e+19,
678
  "trial_name": null,
679
  "trial_params": null
680
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.240791004265219,
5
+ "global_step": 3200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
670
  "learning_rate": 0.0001422730849785107,
671
  "loss": 0.8091,
672
  "step": 2800
673
+ },
674
+ {
675
+ "epoch": 1.1,
676
+ "learning_rate": 0.0001413497689423539,
677
+ "loss": 0.8067,
678
+ "step": 2825
679
+ },
680
+ {
681
+ "epoch": 1.11,
682
+ "learning_rate": 0.00014042218094512755,
683
+ "loss": 0.8046,
684
+ "step": 2850
685
+ },
686
+ {
687
+ "epoch": 1.11,
688
+ "learning_rate": 0.00013949041681855985,
689
+ "loss": 0.8053,
690
+ "step": 2875
691
+ },
692
+ {
693
+ "epoch": 1.12,
694
+ "learning_rate": 0.0001385545728258264,
695
+ "loss": 0.8075,
696
+ "step": 2900
697
+ },
698
+ {
699
+ "epoch": 1.13,
700
+ "learning_rate": 0.0001376147456516055,
701
+ "loss": 0.8015,
702
+ "step": 2925
703
+ },
704
+ {
705
+ "epoch": 1.14,
706
+ "learning_rate": 0.00013667103239208903,
707
+ "loss": 0.8016,
708
+ "step": 2950
709
+ },
710
+ {
711
+ "epoch": 1.15,
712
+ "learning_rate": 0.00013572353054495126,
713
+ "loss": 0.8029,
714
+ "step": 2975
715
+ },
716
+ {
717
+ "epoch": 1.16,
718
+ "learning_rate": 0.0001347723379992762,
719
+ "loss": 0.8017,
720
+ "step": 3000
721
+ },
722
+ {
723
+ "epoch": 1.16,
724
+ "eval_loss": 0.8229297995567322,
725
+ "eval_runtime": 59.3398,
726
+ "eval_samples_per_second": 12.302,
727
+ "eval_steps_per_second": 0.893,
728
+ "step": 3000
729
+ },
730
+ {
731
+ "epoch": 1.17,
732
+ "learning_rate": 0.0001338175530254443,
733
+ "loss": 0.8049,
734
+ "step": 3025
735
+ },
736
+ {
737
+ "epoch": 1.18,
738
+ "learning_rate": 0.00013285927426497985,
739
+ "loss": 0.8027,
740
+ "step": 3050
741
+ },
742
+ {
743
+ "epoch": 1.19,
744
+ "learning_rate": 0.00013189760072036008,
745
+ "loss": 0.8028,
746
+ "step": 3075
747
+ },
748
+ {
749
+ "epoch": 1.2,
750
+ "learning_rate": 0.0001309326317447869,
751
+ "loss": 0.8021,
752
+ "step": 3100
753
+ },
754
+ {
755
+ "epoch": 1.21,
756
+ "learning_rate": 0.00012996446703192257,
757
+ "loss": 0.8033,
758
+ "step": 3125
759
+ },
760
+ {
761
+ "epoch": 1.22,
762
+ "learning_rate": 0.00012899320660558986,
763
+ "loss": 0.8016,
764
+ "step": 3150
765
+ },
766
+ {
767
+ "epoch": 1.23,
768
+ "learning_rate": 0.00012801895080943846,
769
+ "loss": 0.7995,
770
+ "step": 3175
771
+ },
772
+ {
773
+ "epoch": 1.24,
774
+ "learning_rate": 0.0001270418002965782,
775
+ "loss": 0.799,
776
+ "step": 3200
777
  }
778
  ],
779
  "max_steps": 7737,
780
  "num_train_epochs": 3,
781
+ "total_flos": 1.3780535807691457e+19,
782
  "trial_name": null,
783
  "trial_params": null
784
  }
{checkpoint-2800 β†’ checkpoint-3200}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1240323c6a8f49a8a8b1664a44594fc01574f066beb8bd9d04ff53bb19046038
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7292138fecd854f5f17371c439bbd450ee3c48e738b75818b778a55f4e26ef57
3
  size 4027