ZeroUniqueness
commited on
Commit
Β·
c7bce51
1
Parent(s):
bf6bfbe
Training in progress, step 3200
Browse files- adapter_model.bin +1 -1
- {checkpoint-2800 β checkpoint-3100/adapter_model}/README.md +0 -0
- {checkpoint-2800 β checkpoint-3100/adapter_model}/adapter_config.json +4 -4
- {checkpoint-2800 β checkpoint-3100/adapter_model}/adapter_model.bin +1 -1
- checkpoint-3200/README.md +20 -0
- checkpoint-3200/adapter_config.json +26 -0
- checkpoint-3200/adapter_model.bin +3 -0
- {checkpoint-2800 β checkpoint-3200}/optimizer.pt +1 -1
- {checkpoint-2800 β checkpoint-3200}/rng_state_0.pth +1 -1
- {checkpoint-2800 β checkpoint-3200}/rng_state_1.pth +1 -1
- {checkpoint-2800 β checkpoint-3200}/rng_state_10.pth +1 -1
- {checkpoint-2800 β checkpoint-3200}/rng_state_11.pth +1 -1
- {checkpoint-2800 β checkpoint-3200}/rng_state_12.pth +1 -1
- {checkpoint-2800 β checkpoint-3200}/rng_state_13.pth +1 -1
- {checkpoint-2800 β checkpoint-3200}/rng_state_2.pth +1 -1
- {checkpoint-2800 β checkpoint-3200}/rng_state_3.pth +1 -1
- {checkpoint-2800 β checkpoint-3200}/rng_state_4.pth +1 -1
- {checkpoint-2800 β checkpoint-3200}/rng_state_5.pth +1 -1
- {checkpoint-2800 β checkpoint-3200}/rng_state_6.pth +1 -1
- {checkpoint-2800 β checkpoint-3200}/rng_state_7.pth +1 -1
- {checkpoint-2800 β checkpoint-3200}/rng_state_8.pth +1 -1
- {checkpoint-2800 β checkpoint-3200}/rng_state_9.pth +1 -1
- {checkpoint-2800 β checkpoint-3200}/scheduler.pt +1 -1
- {checkpoint-2800 β checkpoint-3200}/trainer_state.json +107 -3
- {checkpoint-2800 β checkpoint-3200}/training_args.bin +1 -1
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db5a92be0cd5f8b38b328e0f82e62452a3fa7b5052a0a1f93fd8c4b1dd18b7a7
|
3 |
size 500897101
|
{checkpoint-2800 β checkpoint-3100/adapter_model}/README.md
RENAMED
File without changes
|
{checkpoint-2800 β checkpoint-3100/adapter_model}/adapter_config.json
RENAMED
@@ -14,13 +14,13 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
|
|
|
|
17 |
"v_proj",
|
18 |
"o_proj",
|
19 |
-
"down_proj",
|
20 |
"q_proj",
|
21 |
-
"
|
22 |
-
"up_proj",
|
23 |
-
"k_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
+
"down_proj",
|
18 |
+
"k_proj",
|
19 |
+
"gate_proj",
|
20 |
"v_proj",
|
21 |
"o_proj",
|
|
|
22 |
"q_proj",
|
23 |
+
"up_proj"
|
|
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
{checkpoint-2800 β checkpoint-3100/adapter_model}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:604d08ed43fee049ea3799c6450a4a1d5f8cc6e58ddf33f377215e77f03ec769
|
3 |
size 500897101
|
checkpoint-3200/README.md
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
---
|
4 |
+
## Training procedure
|
5 |
+
|
6 |
+
|
7 |
+
The following `bitsandbytes` quantization config was used during training:
|
8 |
+
- load_in_8bit: False
|
9 |
+
- load_in_4bit: True
|
10 |
+
- llm_int8_threshold: 6.0
|
11 |
+
- llm_int8_skip_modules: None
|
12 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
13 |
+
- llm_int8_has_fp16_weight: False
|
14 |
+
- bnb_4bit_quant_type: nf4
|
15 |
+
- bnb_4bit_use_double_quant: True
|
16 |
+
- bnb_4bit_compute_dtype: bfloat16
|
17 |
+
### Framework versions
|
18 |
+
|
19 |
+
|
20 |
+
- PEFT 0.5.0.dev0
|
checkpoint-3200/adapter_config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"auto_mapping": null,
|
3 |
+
"base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16",
|
4 |
+
"bias": "none",
|
5 |
+
"fan_in_fan_out": null,
|
6 |
+
"inference_mode": true,
|
7 |
+
"init_lora_weights": true,
|
8 |
+
"layers_pattern": null,
|
9 |
+
"layers_to_transform": null,
|
10 |
+
"lora_alpha": 16,
|
11 |
+
"lora_dropout": 0.05,
|
12 |
+
"modules_to_save": null,
|
13 |
+
"peft_type": "LORA",
|
14 |
+
"r": 32,
|
15 |
+
"revision": null,
|
16 |
+
"target_modules": [
|
17 |
+
"down_proj",
|
18 |
+
"k_proj",
|
19 |
+
"gate_proj",
|
20 |
+
"v_proj",
|
21 |
+
"o_proj",
|
22 |
+
"q_proj",
|
23 |
+
"up_proj"
|
24 |
+
],
|
25 |
+
"task_type": "CAUSAL_LM"
|
26 |
+
}
|
checkpoint-3200/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db5a92be0cd5f8b38b328e0f82e62452a3fa7b5052a0a1f93fd8c4b1dd18b7a7
|
3 |
+
size 500897101
|
{checkpoint-2800 β checkpoint-3200}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001752701
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:734792a711547c325f20f564fba3fc1dafb5a5e2eb00a8e88a628060b50ad4e0
|
3 |
size 1001752701
|
{checkpoint-2800 β checkpoint-3200}/rng_state_0.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47493f691983492eb548e89c4bc00a908f9336311a3cdb9b6d4d0436862a7afe
|
3 |
size 27772
|
{checkpoint-2800 β checkpoint-3200}/rng_state_1.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fd2d4258cdbe26a4276e6adf71ed40f5fb449eeb7efadbcb60ee6c3eb243b4a
|
3 |
size 27772
|
{checkpoint-2800 β checkpoint-3200}/rng_state_10.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1a66a04d36cd4ced3163ee7de7c3f532444837ca748b2c7402468c7cf9ac0cc
|
3 |
size 27789
|
{checkpoint-2800 β checkpoint-3200}/rng_state_11.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fd816f782b7158df45bbb3a0847b7e567cbc8246b73c484a51cea72f837a615
|
3 |
size 27789
|
{checkpoint-2800 β checkpoint-3200}/rng_state_12.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5c0e7d21944297ce2889bf41e83eaf69e29b09934c6e5396b81f21c8d554008
|
3 |
size 27789
|
{checkpoint-2800 β checkpoint-3200}/rng_state_13.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4a856ae9cc1a21a8770367e26ba40b6eb42e788a7f1046e05e030a2571733a3
|
3 |
size 27789
|
{checkpoint-2800 β checkpoint-3200}/rng_state_2.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21fe838aa7244c39844b0cc17fda41f0bf30a92e8c365cbf60e9909dc9539b7a
|
3 |
size 27772
|
{checkpoint-2800 β checkpoint-3200}/rng_state_3.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c73d64b42d450cb0cd9d780bb5d76b4532c040febbd3e57c1a7e507f307c6919
|
3 |
size 27772
|
{checkpoint-2800 β checkpoint-3200}/rng_state_4.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd75d438f8c213a76df48ea475d4e8d725ef60a192611b3a3b5156b54d9bd3aa
|
3 |
size 27772
|
{checkpoint-2800 β checkpoint-3200}/rng_state_5.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a92b4282b70600e8661d984a46aae33c95db7e5a45a98aa75d1ab7fe809f0e87
|
3 |
size 27772
|
{checkpoint-2800 β checkpoint-3200}/rng_state_6.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60a6277ffeade78c02a3c80e695be2609d9c03891fa74c239da9236a5d8a411f
|
3 |
size 27772
|
{checkpoint-2800 β checkpoint-3200}/rng_state_7.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04ceb85b25ddf1ff875502c31946bedfdea45ff9b753957a01149a9fc85fe4aa
|
3 |
size 27772
|
{checkpoint-2800 β checkpoint-3200}/rng_state_8.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7647a4e90848b95f8b12db8456e13797e34b5cb91f94f229dfe860ef88396c11
|
3 |
size 27772
|
{checkpoint-2800 β checkpoint-3200}/rng_state_9.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4297218a6c8fddf475d7093a33dde8e1024bb348b2eacb353ad76d4cb02c652
|
3 |
size 27772
|
{checkpoint-2800 β checkpoint-3200}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c684e0d6e24a062502c981434add298c33b176147f87f977608afcf00cc005c3
|
3 |
size 627
|
{checkpoint-2800 β checkpoint-3200}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -670,11 +670,115 @@
|
|
670 |
"learning_rate": 0.0001422730849785107,
|
671 |
"loss": 0.8091,
|
672 |
"step": 2800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
673 |
}
|
674 |
],
|
675 |
"max_steps": 7737,
|
676 |
"num_train_epochs": 3,
|
677 |
-
"total_flos": 1.
|
678 |
"trial_name": null,
|
679 |
"trial_params": null
|
680 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.240791004265219,
|
5 |
+
"global_step": 3200,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
670 |
"learning_rate": 0.0001422730849785107,
|
671 |
"loss": 0.8091,
|
672 |
"step": 2800
|
673 |
+
},
|
674 |
+
{
|
675 |
+
"epoch": 1.1,
|
676 |
+
"learning_rate": 0.0001413497689423539,
|
677 |
+
"loss": 0.8067,
|
678 |
+
"step": 2825
|
679 |
+
},
|
680 |
+
{
|
681 |
+
"epoch": 1.11,
|
682 |
+
"learning_rate": 0.00014042218094512755,
|
683 |
+
"loss": 0.8046,
|
684 |
+
"step": 2850
|
685 |
+
},
|
686 |
+
{
|
687 |
+
"epoch": 1.11,
|
688 |
+
"learning_rate": 0.00013949041681855985,
|
689 |
+
"loss": 0.8053,
|
690 |
+
"step": 2875
|
691 |
+
},
|
692 |
+
{
|
693 |
+
"epoch": 1.12,
|
694 |
+
"learning_rate": 0.0001385545728258264,
|
695 |
+
"loss": 0.8075,
|
696 |
+
"step": 2900
|
697 |
+
},
|
698 |
+
{
|
699 |
+
"epoch": 1.13,
|
700 |
+
"learning_rate": 0.0001376147456516055,
|
701 |
+
"loss": 0.8015,
|
702 |
+
"step": 2925
|
703 |
+
},
|
704 |
+
{
|
705 |
+
"epoch": 1.14,
|
706 |
+
"learning_rate": 0.00013667103239208903,
|
707 |
+
"loss": 0.8016,
|
708 |
+
"step": 2950
|
709 |
+
},
|
710 |
+
{
|
711 |
+
"epoch": 1.15,
|
712 |
+
"learning_rate": 0.00013572353054495126,
|
713 |
+
"loss": 0.8029,
|
714 |
+
"step": 2975
|
715 |
+
},
|
716 |
+
{
|
717 |
+
"epoch": 1.16,
|
718 |
+
"learning_rate": 0.0001347723379992762,
|
719 |
+
"loss": 0.8017,
|
720 |
+
"step": 3000
|
721 |
+
},
|
722 |
+
{
|
723 |
+
"epoch": 1.16,
|
724 |
+
"eval_loss": 0.8229297995567322,
|
725 |
+
"eval_runtime": 59.3398,
|
726 |
+
"eval_samples_per_second": 12.302,
|
727 |
+
"eval_steps_per_second": 0.893,
|
728 |
+
"step": 3000
|
729 |
+
},
|
730 |
+
{
|
731 |
+
"epoch": 1.17,
|
732 |
+
"learning_rate": 0.0001338175530254443,
|
733 |
+
"loss": 0.8049,
|
734 |
+
"step": 3025
|
735 |
+
},
|
736 |
+
{
|
737 |
+
"epoch": 1.18,
|
738 |
+
"learning_rate": 0.00013285927426497985,
|
739 |
+
"loss": 0.8027,
|
740 |
+
"step": 3050
|
741 |
+
},
|
742 |
+
{
|
743 |
+
"epoch": 1.19,
|
744 |
+
"learning_rate": 0.00013189760072036008,
|
745 |
+
"loss": 0.8028,
|
746 |
+
"step": 3075
|
747 |
+
},
|
748 |
+
{
|
749 |
+
"epoch": 1.2,
|
750 |
+
"learning_rate": 0.0001309326317447869,
|
751 |
+
"loss": 0.8021,
|
752 |
+
"step": 3100
|
753 |
+
},
|
754 |
+
{
|
755 |
+
"epoch": 1.21,
|
756 |
+
"learning_rate": 0.00012996446703192257,
|
757 |
+
"loss": 0.8033,
|
758 |
+
"step": 3125
|
759 |
+
},
|
760 |
+
{
|
761 |
+
"epoch": 1.22,
|
762 |
+
"learning_rate": 0.00012899320660558986,
|
763 |
+
"loss": 0.8016,
|
764 |
+
"step": 3150
|
765 |
+
},
|
766 |
+
{
|
767 |
+
"epoch": 1.23,
|
768 |
+
"learning_rate": 0.00012801895080943846,
|
769 |
+
"loss": 0.7995,
|
770 |
+
"step": 3175
|
771 |
+
},
|
772 |
+
{
|
773 |
+
"epoch": 1.24,
|
774 |
+
"learning_rate": 0.0001270418002965782,
|
775 |
+
"loss": 0.799,
|
776 |
+
"step": 3200
|
777 |
}
|
778 |
],
|
779 |
"max_steps": 7737,
|
780 |
"num_train_epochs": 3,
|
781 |
+
"total_flos": 1.3780535807691457e+19,
|
782 |
"trial_name": null,
|
783 |
"trial_params": null
|
784 |
}
|
{checkpoint-2800 β checkpoint-3200}/training_args.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4027
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7292138fecd854f5f17371c439bbd450ee3c48e738b75818b778a55f4e26ef57
|
3 |
size 4027
|