diff --git a/adapter_config.json b/adapter_config.json
index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..173e5213955c8b23655ab5091de8362cacab7bda 100644
--- a/adapter_config.json
+++ b/adapter_config.json
@@ -14,12 +14,12 @@
"r": 32,
"revision": null,
"target_modules": [
- "up_proj",
- "down_proj",
- "q_proj",
+ "gate_proj",
"v_proj",
+ "down_proj",
"k_proj",
- "gate_proj",
+ "q_proj",
+ "up_proj",
"o_proj"
],
"task_type": "CAUSAL_LM"
diff --git a/adapter_model.bin b/adapter_model.bin
index ac3b1522b1c7533a73f85bbe0bb6c7ee8f6b2132..65fc9cf1ea0ef35269a18503d87f4db78d9e319e 100644
--- a/adapter_model.bin
+++ b/adapter_model.bin
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:f56b8a333605f03b496496aac3531e5eb50e390d67be06083619275a78de77da
+oid sha256:8a26259b6c7f10eacd37169a51779a24aa9d6a76d8fdef027422bdcbf2557c2f
size 500897101
diff --git a/checkpoint-56000/adapter_model.bin b/checkpoint-56000/adapter_model.bin
deleted file mode 100644
index 5660869c0b783f1993700a8f87cfc7179b9a6cdf..0000000000000000000000000000000000000000
--- a/checkpoint-56000/adapter_model.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2d61b2ab661f17f1b28e07a6ea4c559efd2487b69440f512fbda902147b2007f
-size 500897101
diff --git a/checkpoint-56000/adapter_model/adapter_model.bin b/checkpoint-56000/adapter_model/adapter_model.bin
deleted file mode 100644
index 5660869c0b783f1993700a8f87cfc7179b9a6cdf..0000000000000000000000000000000000000000
--- a/checkpoint-56000/adapter_model/adapter_model.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2d61b2ab661f17f1b28e07a6ea4c559efd2487b69440f512fbda902147b2007f
-size 500897101
diff --git a/checkpoint-56000/optimizer.pt b/checkpoint-56000/optimizer.pt
deleted file mode 100644
index 7f861d2cbf487eb06ec9f15c270e9d165caa125c..0000000000000000000000000000000000000000
--- a/checkpoint-56000/optimizer.pt
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e0d0299416431a6687f29eb725bd8536e5bc5512ff27981755266d125bd960dc
-size 1001723453
diff --git a/checkpoint-56000/rng_state.pth b/checkpoint-56000/rng_state.pth
deleted file mode 100644
index c5889daf2f1eef23476d55d0aa0b6145f68cf00f..0000000000000000000000000000000000000000
--- a/checkpoint-56000/rng_state.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d223168e1cf1a8cbe3b356c3a0cf2b7c1b147eab84d53ec37dea66d1618867f6
-size 14575
diff --git a/checkpoint-56000/scheduler.pt b/checkpoint-56000/scheduler.pt
deleted file mode 100644
index 1707969652b814b0689ecc48e49487d339c43f91..0000000000000000000000000000000000000000
--- a/checkpoint-56000/scheduler.pt
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:40f07a3bf2b8b7e85bd7ec32b459bd8eba34e3ffd70129884ee8cac79708a84f
-size 627
diff --git a/checkpoint-56000/training_args.bin b/checkpoint-56000/training_args.bin
deleted file mode 100644
index 5fa131d335bef0de487e84cca21c03f6e4d05ac0..0000000000000000000000000000000000000000
--- a/checkpoint-56000/training_args.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f8ca8c55b410908f1a6fb4d78d55fe6aad82bbca76ec8021e18981496f18fa70
-size 4027
diff --git a/checkpoint-57000/adapter_model.bin b/checkpoint-57000/adapter_model.bin
deleted file mode 100644
index 9e7902152e7806e737b702ec14dcc5c30080cdda..0000000000000000000000000000000000000000
--- a/checkpoint-57000/adapter_model.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9463fbc37a2c37f850b2aa713212bd675cce373b2a226f9fecf647f60157d1a1
-size 500897101
diff --git a/checkpoint-57000/adapter_model/adapter_model.bin b/checkpoint-57000/adapter_model/adapter_model.bin
deleted file mode 100644
index 9e7902152e7806e737b702ec14dcc5c30080cdda..0000000000000000000000000000000000000000
--- a/checkpoint-57000/adapter_model/adapter_model.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9463fbc37a2c37f850b2aa713212bd675cce373b2a226f9fecf647f60157d1a1
-size 500897101
diff --git a/checkpoint-57000/optimizer.pt b/checkpoint-57000/optimizer.pt
deleted file mode 100644
index 0d520db5dadf7329d5d4230fc36be8d0361c9ec9..0000000000000000000000000000000000000000
--- a/checkpoint-57000/optimizer.pt
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d85e0cce4ea774ece1bba3b083129dd4ea4f075278346655fd271c9663edf7a0
-size 1001723453
diff --git a/checkpoint-57000/rng_state.pth b/checkpoint-57000/rng_state.pth
deleted file mode 100644
index b1b5850951fe801890b55c988111ecc898d31225..0000000000000000000000000000000000000000
--- a/checkpoint-57000/rng_state.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e7b0ae395ccd0b4875fa94f8cd4ee3274662f44279f744979610604a15d72da0
-size 14575
diff --git a/checkpoint-57000/scheduler.pt b/checkpoint-57000/scheduler.pt
deleted file mode 100644
index 2071308d81a1e00542811e957086cf96adb3de83..0000000000000000000000000000000000000000
--- a/checkpoint-57000/scheduler.pt
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7c18b73ff8e0ca9bda5d92134e841aafa154377e996a5dd3b1b1a3a0b329e74e
-size 627
diff --git a/checkpoint-57000/training_args.bin b/checkpoint-57000/training_args.bin
deleted file mode 100644
index 5fa131d335bef0de487e84cca21c03f6e4d05ac0..0000000000000000000000000000000000000000
--- a/checkpoint-57000/training_args.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f8ca8c55b410908f1a6fb4d78d55fe6aad82bbca76ec8021e18981496f18fa70
-size 4027
diff --git a/checkpoint-58000/README.md b/checkpoint-58000/README.md
deleted file mode 100644
index f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c..0000000000000000000000000000000000000000
--- a/checkpoint-58000/README.md
+++ /dev/null
@@ -1,20 +0,0 @@
----
-library_name: peft
----
-## Training procedure
-
-
-The following `bitsandbytes` quantization config was used during training:
-- load_in_8bit: False
-- load_in_4bit: True
-- llm_int8_threshold: 6.0
-- llm_int8_skip_modules: None
-- llm_int8_enable_fp32_cpu_offload: False
-- llm_int8_has_fp16_weight: False
-- bnb_4bit_quant_type: nf4
-- bnb_4bit_use_double_quant: True
-- bnb_4bit_compute_dtype: bfloat16
-### Framework versions
-
-
-- PEFT 0.5.0.dev0
diff --git a/checkpoint-58000/adapter_config.json b/checkpoint-58000/adapter_config.json
deleted file mode 100644
index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..0000000000000000000000000000000000000000
--- a/checkpoint-58000/adapter_config.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
- "auto_mapping": null,
- "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16",
- "bias": "none",
- "fan_in_fan_out": null,
- "inference_mode": true,
- "init_lora_weights": true,
- "layers_pattern": null,
- "layers_to_transform": null,
- "lora_alpha": 16,
- "lora_dropout": 0.05,
- "modules_to_save": null,
- "peft_type": "LORA",
- "r": 32,
- "revision": null,
- "target_modules": [
- "up_proj",
- "down_proj",
- "q_proj",
- "v_proj",
- "k_proj",
- "gate_proj",
- "o_proj"
- ],
- "task_type": "CAUSAL_LM"
-}
\ No newline at end of file
diff --git a/checkpoint-58000/adapter_model.bin b/checkpoint-58000/adapter_model.bin
deleted file mode 100644
index 38268d0d52c3d2167e0d36ff92bf514b9a21f10e..0000000000000000000000000000000000000000
--- a/checkpoint-58000/adapter_model.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cf9efdf73d7ecc9f45ca166bec5b70555182c38338e6de139c6203b8a009fc59
-size 500897101
diff --git a/checkpoint-58000/adapter_model/README.md b/checkpoint-58000/adapter_model/README.md
deleted file mode 100644
index f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c..0000000000000000000000000000000000000000
--- a/checkpoint-58000/adapter_model/README.md
+++ /dev/null
@@ -1,20 +0,0 @@
----
-library_name: peft
----
-## Training procedure
-
-
-The following `bitsandbytes` quantization config was used during training:
-- load_in_8bit: False
-- load_in_4bit: True
-- llm_int8_threshold: 6.0
-- llm_int8_skip_modules: None
-- llm_int8_enable_fp32_cpu_offload: False
-- llm_int8_has_fp16_weight: False
-- bnb_4bit_quant_type: nf4
-- bnb_4bit_use_double_quant: True
-- bnb_4bit_compute_dtype: bfloat16
-### Framework versions
-
-
-- PEFT 0.5.0.dev0
diff --git a/checkpoint-58000/adapter_model/adapter_config.json b/checkpoint-58000/adapter_model/adapter_config.json
deleted file mode 100644
index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..0000000000000000000000000000000000000000
--- a/checkpoint-58000/adapter_model/adapter_config.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
- "auto_mapping": null,
- "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16",
- "bias": "none",
- "fan_in_fan_out": null,
- "inference_mode": true,
- "init_lora_weights": true,
- "layers_pattern": null,
- "layers_to_transform": null,
- "lora_alpha": 16,
- "lora_dropout": 0.05,
- "modules_to_save": null,
- "peft_type": "LORA",
- "r": 32,
- "revision": null,
- "target_modules": [
- "up_proj",
- "down_proj",
- "q_proj",
- "v_proj",
- "k_proj",
- "gate_proj",
- "o_proj"
- ],
- "task_type": "CAUSAL_LM"
-}
\ No newline at end of file
diff --git a/checkpoint-58000/adapter_model/adapter_model.bin b/checkpoint-58000/adapter_model/adapter_model.bin
deleted file mode 100644
index 38268d0d52c3d2167e0d36ff92bf514b9a21f10e..0000000000000000000000000000000000000000
--- a/checkpoint-58000/adapter_model/adapter_model.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cf9efdf73d7ecc9f45ca166bec5b70555182c38338e6de139c6203b8a009fc59
-size 500897101
diff --git a/checkpoint-58000/optimizer.pt b/checkpoint-58000/optimizer.pt
deleted file mode 100644
index 9bbb843998b8bf92c75b68973a7fdcbb9ce63d7f..0000000000000000000000000000000000000000
--- a/checkpoint-58000/optimizer.pt
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:67f192e31625a5f9d71aaeb75826e3461458c994c58bc8d3d5b3b59fa56efc4b
-size 1001723453
diff --git a/checkpoint-58000/rng_state.pth b/checkpoint-58000/rng_state.pth
deleted file mode 100644
index 70494ee8fdee686594723ae0f399ca93c60a4875..0000000000000000000000000000000000000000
--- a/checkpoint-58000/rng_state.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5923ba7d43395d2ab7a25af40d67f773d9e67e462f9250548814d4e4d1853054
-size 14575
diff --git a/checkpoint-58000/scheduler.pt b/checkpoint-58000/scheduler.pt
deleted file mode 100644
index 2285fd7f23746958e4a8cb75acb768d7a2250aa7..0000000000000000000000000000000000000000
--- a/checkpoint-58000/scheduler.pt
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:eb6d23b542a910d4d880a9ad37544effe8607b091db3f3b955d778af0357176f
-size 627
diff --git a/checkpoint-58000/training_args.bin b/checkpoint-58000/training_args.bin
deleted file mode 100644
index 5fa131d335bef0de487e84cca21c03f6e4d05ac0..0000000000000000000000000000000000000000
--- a/checkpoint-58000/training_args.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f8ca8c55b410908f1a6fb4d78d55fe6aad82bbca76ec8021e18981496f18fa70
-size 4027
diff --git a/checkpoint-59000/README.md b/checkpoint-59000/README.md
deleted file mode 100644
index f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c..0000000000000000000000000000000000000000
--- a/checkpoint-59000/README.md
+++ /dev/null
@@ -1,20 +0,0 @@
----
-library_name: peft
----
-## Training procedure
-
-
-The following `bitsandbytes` quantization config was used during training:
-- load_in_8bit: False
-- load_in_4bit: True
-- llm_int8_threshold: 6.0
-- llm_int8_skip_modules: None
-- llm_int8_enable_fp32_cpu_offload: False
-- llm_int8_has_fp16_weight: False
-- bnb_4bit_quant_type: nf4
-- bnb_4bit_use_double_quant: True
-- bnb_4bit_compute_dtype: bfloat16
-### Framework versions
-
-
-- PEFT 0.5.0.dev0
diff --git a/checkpoint-59000/adapter_config.json b/checkpoint-59000/adapter_config.json
deleted file mode 100644
index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..0000000000000000000000000000000000000000
--- a/checkpoint-59000/adapter_config.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
- "auto_mapping": null,
- "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16",
- "bias": "none",
- "fan_in_fan_out": null,
- "inference_mode": true,
- "init_lora_weights": true,
- "layers_pattern": null,
- "layers_to_transform": null,
- "lora_alpha": 16,
- "lora_dropout": 0.05,
- "modules_to_save": null,
- "peft_type": "LORA",
- "r": 32,
- "revision": null,
- "target_modules": [
- "up_proj",
- "down_proj",
- "q_proj",
- "v_proj",
- "k_proj",
- "gate_proj",
- "o_proj"
- ],
- "task_type": "CAUSAL_LM"
-}
\ No newline at end of file
diff --git a/checkpoint-59000/adapter_model.bin b/checkpoint-59000/adapter_model.bin
deleted file mode 100644
index ac3b1522b1c7533a73f85bbe0bb6c7ee8f6b2132..0000000000000000000000000000000000000000
--- a/checkpoint-59000/adapter_model.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f56b8a333605f03b496496aac3531e5eb50e390d67be06083619275a78de77da
-size 500897101
diff --git a/checkpoint-59000/optimizer.pt b/checkpoint-59000/optimizer.pt
deleted file mode 100644
index aff4f19ffec18b9cc4623ef3a567549e1495e792..0000000000000000000000000000000000000000
--- a/checkpoint-59000/optimizer.pt
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b26d7da827461914ac19ca3bc7f168368f4015c2a5364188dfe94a4e3cfde0cb
-size 1001723453
diff --git a/checkpoint-59000/rng_state.pth b/checkpoint-59000/rng_state.pth
deleted file mode 100644
index da05bf34ede76c80250c8b168b2b4a471506aa01..0000000000000000000000000000000000000000
--- a/checkpoint-59000/rng_state.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8bd334de4d3525ea70c0977c8fe7956563ce9e7d3af12dc2b9fcbbc68894cb2d
-size 14575
diff --git a/checkpoint-59000/scheduler.pt b/checkpoint-59000/scheduler.pt
deleted file mode 100644
index 51be63987e18fc71d49a0fe7cfb65e47982988f2..0000000000000000000000000000000000000000
--- a/checkpoint-59000/scheduler.pt
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:67aad82a87c2a78c7bf3dfc2188cc16487d1a53a6ab0632026c89faf1cd6731c
-size 627
diff --git a/checkpoint-59000/training_args.bin b/checkpoint-59000/training_args.bin
deleted file mode 100644
index 5fa131d335bef0de487e84cca21c03f6e4d05ac0..0000000000000000000000000000000000000000
--- a/checkpoint-59000/training_args.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f8ca8c55b410908f1a6fb4d78d55fe6aad82bbca76ec8021e18981496f18fa70
-size 4027
diff --git a/checkpoint-56000/README.md b/checkpoint-69000/README.md
similarity index 93%
rename from checkpoint-56000/README.md
rename to checkpoint-69000/README.md
index f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c..f397922221c4a2f56d632b66d68ab92408f4d0f6 100644
--- a/checkpoint-56000/README.md
+++ b/checkpoint-69000/README.md
@@ -5,6 +5,7 @@ library_name: peft
The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
- load_in_8bit: False
- load_in_4bit: True
- llm_int8_threshold: 6.0
diff --git a/checkpoint-56000/adapter_model/adapter_config.json b/checkpoint-69000/adapter_config.json
similarity index 100%
rename from checkpoint-56000/adapter_model/adapter_config.json
rename to checkpoint-69000/adapter_config.json
index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..173e5213955c8b23655ab5091de8362cacab7bda 100644
--- a/checkpoint-56000/adapter_model/adapter_config.json
+++ b/checkpoint-69000/adapter_config.json
@@ -14,12 +14,12 @@
"r": 32,
"revision": null,
"target_modules": [
- "up_proj",
- "down_proj",
- "q_proj",
+ "gate_proj",
"v_proj",
+ "down_proj",
"k_proj",
- "gate_proj",
+ "q_proj",
+ "up_proj",
"o_proj"
],
"task_type": "CAUSAL_LM"
diff --git a/checkpoint-69000/adapter_model.bin b/checkpoint-69000/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..71903bbe3394aef44445334a8aeaf8a5b325b36d
--- /dev/null
+++ b/checkpoint-69000/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16127581d1b65765200af747a5c98d27b237b49430e306dfd23a9c3ad6af3b9c
+size 500897101
diff --git a/checkpoint-57000/README.md b/checkpoint-69000/adapter_model/README.md
similarity index 93%
rename from checkpoint-57000/README.md
rename to checkpoint-69000/adapter_model/README.md
index f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c..f397922221c4a2f56d632b66d68ab92408f4d0f6 100644
--- a/checkpoint-57000/README.md
+++ b/checkpoint-69000/adapter_model/README.md
@@ -5,6 +5,7 @@ library_name: peft
The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
- load_in_8bit: False
- load_in_4bit: True
- llm_int8_threshold: 6.0
diff --git a/checkpoint-56000/adapter_config.json b/checkpoint-69000/adapter_model/adapter_config.json
similarity index 100%
rename from checkpoint-56000/adapter_config.json
rename to checkpoint-69000/adapter_model/adapter_config.json
index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..173e5213955c8b23655ab5091de8362cacab7bda 100644
--- a/checkpoint-56000/adapter_config.json
+++ b/checkpoint-69000/adapter_model/adapter_config.json
@@ -14,12 +14,12 @@
"r": 32,
"revision": null,
"target_modules": [
- "up_proj",
- "down_proj",
- "q_proj",
+ "gate_proj",
"v_proj",
+ "down_proj",
"k_proj",
- "gate_proj",
+ "q_proj",
+ "up_proj",
"o_proj"
],
"task_type": "CAUSAL_LM"
diff --git a/checkpoint-69000/adapter_model/adapter_model.bin b/checkpoint-69000/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..71903bbe3394aef44445334a8aeaf8a5b325b36d
--- /dev/null
+++ b/checkpoint-69000/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16127581d1b65765200af747a5c98d27b237b49430e306dfd23a9c3ad6af3b9c
+size 500897101
diff --git a/checkpoint-69000/optimizer.pt b/checkpoint-69000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..50d7da297d84d8bcccbb2e41c8b73d63ed0c1b96
--- /dev/null
+++ b/checkpoint-69000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52478f59ec5c65d4db6d79009fc0c477e003ba9db2b5648781779b6963bc40cb
+size 1001724605
diff --git a/checkpoint-69000/rng_state.pth b/checkpoint-69000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8866aa3b38194844d50a80dfd7dcfead003f32da
--- /dev/null
+++ b/checkpoint-69000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7732edd0ae5999edb700e14bae64e828df5241beb83fbee05815f6c10b73570
+size 14575
diff --git a/checkpoint-69000/scheduler.pt b/checkpoint-69000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..437184049237e0e08f28edb326199e61b88e5ad7
--- /dev/null
+++ b/checkpoint-69000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0f5690258b17f07cbd583d2e586e1be27217d957aa1adadeb296ee58f808a87
+size 627
diff --git a/checkpoint-59000/trainer_state.json b/checkpoint-69000/trainer_state.json
similarity index 92%
rename from checkpoint-59000/trainer_state.json
rename to checkpoint-69000/trainer_state.json
index a574a20b178ba231bb7bbb0feab8d2272b572097..872a75702b83ffc0648cf4e59ff1bad375c34d07 100644
--- a/checkpoint-59000/trainer_state.json
+++ b/checkpoint-69000/trainer_state.json
@@ -1,8 +1,9 @@
{
- "best_metric": 0.4893116354942322,
- "best_model_checkpoint": "./qlora-out/checkpoint-59000",
- "epoch": 2.1997688378509377,
- "global_step": 59000,
+ "best_metric": 0.4789520502090454,
+ "best_model_checkpoint": "./qlora-out/checkpoint-69000",
+ "epoch": 2.5726110137578764,
+ "eval_steps": 500,
+ "global_step": 69000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
@@ -4018,11 +4019,293 @@
"eval_samples_per_second": 0.436,
"eval_steps_per_second": 0.436,
"step": 59000
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 3.167411635594364e-05,
+ "loss": 0.3867,
+ "step": 59500
+ },
+ {
+ "epoch": 2.22,
+ "eval_loss": 0.48985520005226135,
+ "eval_runtime": 1240.4608,
+ "eval_samples_per_second": 0.437,
+ "eval_steps_per_second": 0.437,
+ "step": 59500
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 3.0261604379828834e-05,
+ "loss": 0.3736,
+ "step": 60000
+ },
+ {
+ "epoch": 2.24,
+ "eval_loss": 0.489548921585083,
+ "eval_runtime": 1234.7527,
+ "eval_samples_per_second": 0.439,
+ "eval_steps_per_second": 0.439,
+ "step": 60000
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 2.887567598106955e-05,
+ "loss": 0.361,
+ "step": 60500
+ },
+ {
+ "epoch": 2.26,
+ "eval_loss": 0.4885287582874298,
+ "eval_runtime": 1231.4045,
+ "eval_samples_per_second": 0.44,
+ "eval_steps_per_second": 0.44,
+ "step": 60500
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 2.7516859461678857e-05,
+ "loss": 0.3778,
+ "step": 61000
+ },
+ {
+ "epoch": 2.27,
+ "eval_loss": 0.4883672893047333,
+ "eval_runtime": 1235.8497,
+ "eval_samples_per_second": 0.439,
+ "eval_steps_per_second": 0.439,
+ "step": 61000
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 2.618567278889328e-05,
+ "loss": 0.3791,
+ "step": 61500
+ },
+ {
+ "epoch": 2.29,
+ "eval_loss": 0.4874744415283203,
+ "eval_runtime": 1231.8195,
+ "eval_samples_per_second": 0.44,
+ "eval_steps_per_second": 0.44,
+ "step": 61500
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 2.4882623397728655e-05,
+ "loss": 0.3705,
+ "step": 62000
+ },
+ {
+ "epoch": 2.31,
+ "eval_loss": 0.486933171749115,
+ "eval_runtime": 1227.5583,
+ "eval_samples_per_second": 0.442,
+ "eval_steps_per_second": 0.442,
+ "step": 62000
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 2.3608207997551255e-05,
+ "loss": 0.3698,
+ "step": 62500
+ },
+ {
+ "epoch": 2.33,
+ "eval_loss": 0.48592954874038696,
+ "eval_runtime": 1282.2531,
+ "eval_samples_per_second": 0.423,
+ "eval_steps_per_second": 0.423,
+ "step": 62500
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 2.2362912382736857e-05,
+ "loss": 0.381,
+ "step": 63000
+ },
+ {
+ "epoch": 2.35,
+ "eval_loss": 0.4852922856807709,
+ "eval_runtime": 1229.4457,
+ "eval_samples_per_second": 0.441,
+ "eval_steps_per_second": 0.441,
+ "step": 63000
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 2.1147211247491084e-05,
+ "loss": 0.3728,
+ "step": 63500
+ },
+ {
+ "epoch": 2.37,
+ "eval_loss": 0.484967440366745,
+ "eval_runtime": 1296.2845,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 63500
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 1.9961568004900565e-05,
+ "loss": 0.3695,
+ "step": 64000
+ },
+ {
+ "epoch": 2.39,
+ "eval_loss": 0.4844016432762146,
+ "eval_runtime": 1317.5418,
+ "eval_samples_per_second": 0.411,
+ "eval_steps_per_second": 0.411,
+ "step": 64000
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 1.8806434610284497e-05,
+ "loss": 0.3682,
+ "step": 64500
+ },
+ {
+ "epoch": 2.4,
+ "eval_loss": 0.4838670790195465,
+ "eval_runtime": 1337.5922,
+ "eval_samples_per_second": 0.405,
+ "eval_steps_per_second": 0.405,
+ "step": 64500
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 1.768225138891393e-05,
+ "loss": 0.3594,
+ "step": 65000
+ },
+ {
+ "epoch": 2.42,
+ "eval_loss": 0.48305046558380127,
+ "eval_runtime": 1317.2888,
+ "eval_samples_per_second": 0.411,
+ "eval_steps_per_second": 0.411,
+ "step": 65000
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 1.6589446868164037e-05,
+ "loss": 0.367,
+ "step": 65500
+ },
+ {
+ "epoch": 2.44,
+ "eval_loss": 0.48225167393684387,
+ "eval_runtime": 1315.9763,
+ "eval_samples_per_second": 0.412,
+ "eval_steps_per_second": 0.412,
+ "step": 65500
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 1.552843761416395e-05,
+ "loss": 0.3781,
+ "step": 66000
+ },
+ {
+ "epoch": 2.46,
+ "eval_loss": 0.48182958364486694,
+ "eval_runtime": 1298.0711,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 66000
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 1.4499628073005733e-05,
+ "loss": 0.3632,
+ "step": 66500
+ },
+ {
+ "epoch": 2.48,
+ "eval_loss": 0.48136985301971436,
+ "eval_runtime": 1295.6256,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 66500
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 1.350341041657378e-05,
+ "loss": 0.3707,
+ "step": 67000
+ },
+ {
+ "epoch": 2.5,
+ "eval_loss": 0.48081424832344055,
+ "eval_runtime": 1297.8801,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 67000
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 1.2540164393052622e-05,
+ "loss": 0.3657,
+ "step": 67500
+ },
+ {
+ "epoch": 2.52,
+ "eval_loss": 0.48031187057495117,
+ "eval_runtime": 1299.2471,
+ "eval_samples_per_second": 0.417,
+ "eval_steps_per_second": 0.417,
+ "step": 67500
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 1.1610257182170914e-05,
+ "loss": 0.3742,
+ "step": 68000
+ },
+ {
+ "epoch": 2.54,
+ "eval_loss": 0.479922354221344,
+ "eval_runtime": 1275.2567,
+ "eval_samples_per_second": 0.425,
+ "eval_steps_per_second": 0.425,
+ "step": 68000
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 1.0714043255236094e-05,
+ "loss": 0.3761,
+ "step": 68500
+ },
+ {
+ "epoch": 2.55,
+ "eval_loss": 0.4795922338962555,
+ "eval_runtime": 1321.5276,
+ "eval_samples_per_second": 0.41,
+ "eval_steps_per_second": 0.41,
+ "step": 68500
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 9.851864240013509e-06,
+ "loss": 0.3754,
+ "step": 69000
+ },
+ {
+ "epoch": 2.57,
+ "eval_loss": 0.4789520502090454,
+ "eval_runtime": 1345.4528,
+ "eval_samples_per_second": 0.403,
+ "eval_steps_per_second": 0.403,
+ "step": 69000
}
],
+ "logging_steps": 500,
"max_steps": 80463,
"num_train_epochs": 3,
- "total_flos": 1.6542001385066742e+19,
+ "save_steps": 500,
+ "total_flos": 1.9364073941589443e+19,
"trial_name": null,
"trial_params": null
}
diff --git a/checkpoint-69000/training_args.bin b/checkpoint-69000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fcdbc2e5ceda75e1111d82393dc8f31eb77db7e6
--- /dev/null
+++ b/checkpoint-69000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35750ce2c97e67db338d1121db50269062def2ea29de48747dfd43b7a072ee79
+size 4155
diff --git a/checkpoint-57000/adapter_model/README.md b/checkpoint-69500/README.md
similarity index 93%
rename from checkpoint-57000/adapter_model/README.md
rename to checkpoint-69500/README.md
index f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c..f397922221c4a2f56d632b66d68ab92408f4d0f6 100644
--- a/checkpoint-57000/adapter_model/README.md
+++ b/checkpoint-69500/README.md
@@ -5,6 +5,7 @@ library_name: peft
The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
- load_in_8bit: False
- load_in_4bit: True
- llm_int8_threshold: 6.0
diff --git a/checkpoint-57000/adapter_config.json b/checkpoint-69500/adapter_config.json
similarity index 100%
rename from checkpoint-57000/adapter_config.json
rename to checkpoint-69500/adapter_config.json
index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..173e5213955c8b23655ab5091de8362cacab7bda 100644
--- a/checkpoint-57000/adapter_config.json
+++ b/checkpoint-69500/adapter_config.json
@@ -14,12 +14,12 @@
"r": 32,
"revision": null,
"target_modules": [
- "up_proj",
- "down_proj",
- "q_proj",
+ "gate_proj",
"v_proj",
+ "down_proj",
"k_proj",
- "gate_proj",
+ "q_proj",
+ "up_proj",
"o_proj"
],
"task_type": "CAUSAL_LM"
diff --git a/checkpoint-69500/adapter_model.bin b/checkpoint-69500/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8077f59997c7d8fa6a0a7c7e9292ff33a8107f9b
--- /dev/null
+++ b/checkpoint-69500/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d536051f2a1ab536e6e716808efa406b8fc4bc641ebcf6102a663de9eab5ffe
+size 500897101
diff --git a/checkpoint-56000/adapter_model/README.md b/checkpoint-69500/adapter_model/README.md
similarity index 93%
rename from checkpoint-56000/adapter_model/README.md
rename to checkpoint-69500/adapter_model/README.md
index f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c..f397922221c4a2f56d632b66d68ab92408f4d0f6 100644
--- a/checkpoint-56000/adapter_model/README.md
+++ b/checkpoint-69500/adapter_model/README.md
@@ -5,6 +5,7 @@ library_name: peft
The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
- load_in_8bit: False
- load_in_4bit: True
- llm_int8_threshold: 6.0
diff --git a/checkpoint-57000/adapter_model/adapter_config.json b/checkpoint-69500/adapter_model/adapter_config.json
similarity index 100%
rename from checkpoint-57000/adapter_model/adapter_config.json
rename to checkpoint-69500/adapter_model/adapter_config.json
index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..173e5213955c8b23655ab5091de8362cacab7bda 100644
--- a/checkpoint-57000/adapter_model/adapter_config.json
+++ b/checkpoint-69500/adapter_model/adapter_config.json
@@ -14,12 +14,12 @@
"r": 32,
"revision": null,
"target_modules": [
- "up_proj",
- "down_proj",
- "q_proj",
+ "gate_proj",
"v_proj",
+ "down_proj",
"k_proj",
- "gate_proj",
+ "q_proj",
+ "up_proj",
"o_proj"
],
"task_type": "CAUSAL_LM"
diff --git a/checkpoint-69500/adapter_model/adapter_model.bin b/checkpoint-69500/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8077f59997c7d8fa6a0a7c7e9292ff33a8107f9b
--- /dev/null
+++ b/checkpoint-69500/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d536051f2a1ab536e6e716808efa406b8fc4bc641ebcf6102a663de9eab5ffe
+size 500897101
diff --git a/checkpoint-69500/optimizer.pt b/checkpoint-69500/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c0fa7a9efdc2f860685085b06d5c19abedffc11a
--- /dev/null
+++ b/checkpoint-69500/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0df421a10c3784a131b0ab37e1485ed063b6fa56024cc56104f9dbaad09ebe1
+size 1001724605
diff --git a/checkpoint-69500/rng_state.pth b/checkpoint-69500/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..87f90c42022499cbddffc755d7034ebd31d186f5
--- /dev/null
+++ b/checkpoint-69500/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9ee221e71303f97217b0d58a1364dcc9e4c1fac4ba0baf829b9e79b7ae1680b
+size 14575
diff --git a/checkpoint-69500/scheduler.pt b/checkpoint-69500/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5a7b6e54a6254ecb5c039650773e303d80e1178c
--- /dev/null
+++ b/checkpoint-69500/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bca69d6e74edb4d1fa3e9c45efbdb18d22e7412cb25b7cb947ef97719376c1f2
+size 627
diff --git a/checkpoint-58000/trainer_state.json b/checkpoint-69500/trainer_state.json
similarity index 90%
rename from checkpoint-58000/trainer_state.json
rename to checkpoint-69500/trainer_state.json
index 35f2bb833dfc31ae6f6c785565af1f3fb9273743..0615a024b721a7fc2b0a79069816431cfc3cd203 100644
--- a/checkpoint-58000/trainer_state.json
+++ b/checkpoint-69500/trainer_state.json
@@ -1,8 +1,9 @@
{
- "best_metric": 0.4916069805622101,
- "best_model_checkpoint": "./qlora-out/checkpoint-58000",
- "epoch": 2.1624846202602437,
- "global_step": 58000,
+ "best_metric": 0.47866225242614746,
+ "best_model_checkpoint": "./qlora-out/checkpoint-69500",
+ "epoch": 2.591253122553223,
+ "eval_steps": 500,
+ "global_step": 69500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
@@ -3950,11 +3951,375 @@
"eval_samples_per_second": 0.436,
"eval_steps_per_second": 0.436,
"step": 58000
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 3.576595345767464e-05,
+ "loss": 0.3759,
+ "step": 58100
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 3.5467166397551524e-05,
+ "loss": 0.3987,
+ "step": 58200
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 3.5169363338208094e-05,
+ "loss": 0.3809,
+ "step": 58300
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 3.4872548820564455e-05,
+ "loss": 0.3851,
+ "step": 58400
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 3.457672737046737e-05,
+ "loss": 0.3832,
+ "step": 58500
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 3.42819034986213e-05,
+ "loss": 0.3923,
+ "step": 58600
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 3.398808170051951e-05,
+ "loss": 0.3609,
+ "step": 58700
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 3.369526645637556e-05,
+ "loss": 0.3538,
+ "step": 58800
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 3.3403462231055107e-05,
+ "loss": 0.3941,
+ "step": 58900
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 3.3112673474007584e-05,
+ "loss": 0.3984,
+ "step": 59000
+ },
+ {
+ "epoch": 2.2,
+ "eval_loss": 0.4893116354942322,
+ "eval_runtime": 1243.7748,
+ "eval_samples_per_second": 0.436,
+ "eval_steps_per_second": 0.436,
+ "step": 59000
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 3.167411635594364e-05,
+ "loss": 0.3867,
+ "step": 59500
+ },
+ {
+ "epoch": 2.22,
+ "eval_loss": 0.48985520005226135,
+ "eval_runtime": 1240.4608,
+ "eval_samples_per_second": 0.437,
+ "eval_steps_per_second": 0.437,
+ "step": 59500
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 3.0261604379828834e-05,
+ "loss": 0.3736,
+ "step": 60000
+ },
+ {
+ "epoch": 2.24,
+ "eval_loss": 0.489548921585083,
+ "eval_runtime": 1234.7527,
+ "eval_samples_per_second": 0.439,
+ "eval_steps_per_second": 0.439,
+ "step": 60000
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 2.887567598106955e-05,
+ "loss": 0.361,
+ "step": 60500
+ },
+ {
+ "epoch": 2.26,
+ "eval_loss": 0.4885287582874298,
+ "eval_runtime": 1231.4045,
+ "eval_samples_per_second": 0.44,
+ "eval_steps_per_second": 0.44,
+ "step": 60500
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 2.7516859461678857e-05,
+ "loss": 0.3778,
+ "step": 61000
+ },
+ {
+ "epoch": 2.27,
+ "eval_loss": 0.4883672893047333,
+ "eval_runtime": 1235.8497,
+ "eval_samples_per_second": 0.439,
+ "eval_steps_per_second": 0.439,
+ "step": 61000
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 2.618567278889328e-05,
+ "loss": 0.3791,
+ "step": 61500
+ },
+ {
+ "epoch": 2.29,
+ "eval_loss": 0.4874744415283203,
+ "eval_runtime": 1231.8195,
+ "eval_samples_per_second": 0.44,
+ "eval_steps_per_second": 0.44,
+ "step": 61500
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 2.4882623397728655e-05,
+ "loss": 0.3705,
+ "step": 62000
+ },
+ {
+ "epoch": 2.31,
+ "eval_loss": 0.486933171749115,
+ "eval_runtime": 1227.5583,
+ "eval_samples_per_second": 0.442,
+ "eval_steps_per_second": 0.442,
+ "step": 62000
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 2.3608207997551255e-05,
+ "loss": 0.3698,
+ "step": 62500
+ },
+ {
+ "epoch": 2.33,
+ "eval_loss": 0.48592954874038696,
+ "eval_runtime": 1282.2531,
+ "eval_samples_per_second": 0.423,
+ "eval_steps_per_second": 0.423,
+ "step": 62500
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 2.2362912382736857e-05,
+ "loss": 0.381,
+ "step": 63000
+ },
+ {
+ "epoch": 2.35,
+ "eval_loss": 0.4852922856807709,
+ "eval_runtime": 1229.4457,
+ "eval_samples_per_second": 0.441,
+ "eval_steps_per_second": 0.441,
+ "step": 63000
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 2.1147211247491084e-05,
+ "loss": 0.3728,
+ "step": 63500
+ },
+ {
+ "epoch": 2.37,
+ "eval_loss": 0.484967440366745,
+ "eval_runtime": 1296.2845,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 63500
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 1.9961568004900565e-05,
+ "loss": 0.3695,
+ "step": 64000
+ },
+ {
+ "epoch": 2.39,
+ "eval_loss": 0.4844016432762146,
+ "eval_runtime": 1317.5418,
+ "eval_samples_per_second": 0.411,
+ "eval_steps_per_second": 0.411,
+ "step": 64000
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 1.8806434610284497e-05,
+ "loss": 0.3682,
+ "step": 64500
+ },
+ {
+ "epoch": 2.4,
+ "eval_loss": 0.4838670790195465,
+ "eval_runtime": 1337.5922,
+ "eval_samples_per_second": 0.405,
+ "eval_steps_per_second": 0.405,
+ "step": 64500
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 1.768225138891393e-05,
+ "loss": 0.3594,
+ "step": 65000
+ },
+ {
+ "epoch": 2.42,
+ "eval_loss": 0.48305046558380127,
+ "eval_runtime": 1317.2888,
+ "eval_samples_per_second": 0.411,
+ "eval_steps_per_second": 0.411,
+ "step": 65000
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 1.6589446868164037e-05,
+ "loss": 0.367,
+ "step": 65500
+ },
+ {
+ "epoch": 2.44,
+ "eval_loss": 0.48225167393684387,
+ "eval_runtime": 1315.9763,
+ "eval_samples_per_second": 0.412,
+ "eval_steps_per_second": 0.412,
+ "step": 65500
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 1.552843761416395e-05,
+ "loss": 0.3781,
+ "step": 66000
+ },
+ {
+ "epoch": 2.46,
+ "eval_loss": 0.48182958364486694,
+ "eval_runtime": 1298.0711,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 66000
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 1.4499628073005733e-05,
+ "loss": 0.3632,
+ "step": 66500
+ },
+ {
+ "epoch": 2.48,
+ "eval_loss": 0.48136985301971436,
+ "eval_runtime": 1295.6256,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 66500
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 1.350341041657378e-05,
+ "loss": 0.3707,
+ "step": 67000
+ },
+ {
+ "epoch": 2.5,
+ "eval_loss": 0.48081424832344055,
+ "eval_runtime": 1297.8801,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 67000
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 1.2540164393052622e-05,
+ "loss": 0.3657,
+ "step": 67500
+ },
+ {
+ "epoch": 2.52,
+ "eval_loss": 0.48031187057495117,
+ "eval_runtime": 1299.2471,
+ "eval_samples_per_second": 0.417,
+ "eval_steps_per_second": 0.417,
+ "step": 67500
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 1.1610257182170914e-05,
+ "loss": 0.3742,
+ "step": 68000
+ },
+ {
+ "epoch": 2.54,
+ "eval_loss": 0.479922354221344,
+ "eval_runtime": 1275.2567,
+ "eval_samples_per_second": 0.425,
+ "eval_steps_per_second": 0.425,
+ "step": 68000
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 1.0714043255236094e-05,
+ "loss": 0.3761,
+ "step": 68500
+ },
+ {
+ "epoch": 2.55,
+ "eval_loss": 0.4795922338962555,
+ "eval_runtime": 1321.5276,
+ "eval_samples_per_second": 0.41,
+ "eval_steps_per_second": 0.41,
+ "step": 68500
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 9.851864240013509e-06,
+ "loss": 0.3754,
+ "step": 69000
+ },
+ {
+ "epoch": 2.57,
+ "eval_loss": 0.4789520502090454,
+ "eval_runtime": 1345.4528,
+ "eval_samples_per_second": 0.403,
+ "eval_steps_per_second": 0.403,
+ "step": 69000
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 9.024048790501272e-06,
+ "loss": 0.3594,
+ "step": 69500
+ },
+ {
+ "epoch": 2.59,
+ "eval_loss": 0.47866225242614746,
+ "eval_runtime": 1316.9883,
+ "eval_samples_per_second": 0.412,
+ "eval_steps_per_second": 0.412,
+ "step": 69500
}
],
+ "logging_steps": 500,
"max_steps": 80463,
"num_train_epochs": 3,
- "total_flos": 1.6261229153876214e+19,
+ "save_steps": 500,
+ "total_flos": 1.950603151563399e+19,
"trial_name": null,
"trial_params": null
}
diff --git a/checkpoint-69500/training_args.bin b/checkpoint-69500/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fcdbc2e5ceda75e1111d82393dc8f31eb77db7e6
--- /dev/null
+++ b/checkpoint-69500/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35750ce2c97e67db338d1121db50269062def2ea29de48747dfd43b7a072ee79
+size 4155
diff --git a/checkpoint-70000/README.md b/checkpoint-70000/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6
--- /dev/null
+++ b/checkpoint-70000/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/checkpoint-70000/adapter_config.json b/checkpoint-70000/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..173e5213955c8b23655ab5091de8362cacab7bda
--- /dev/null
+++ b/checkpoint-70000/adapter_config.json
@@ -0,0 +1,26 @@
+{
+ "auto_mapping": null,
+ "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "lora_alpha": 16,
+ "lora_dropout": 0.05,
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 32,
+ "revision": null,
+ "target_modules": [
+ "gate_proj",
+ "v_proj",
+ "down_proj",
+ "k_proj",
+ "q_proj",
+ "up_proj",
+ "o_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-70000/adapter_model.bin b/checkpoint-70000/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..184610ccd8dd69ea3e5599df40a79e43410fef00
--- /dev/null
+++ b/checkpoint-70000/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35b27172603bfaa42af020910d0f3a0724656396738e74f39eebef1c4c53cd6c
+size 500897101
diff --git a/checkpoint-70000/adapter_model/README.md b/checkpoint-70000/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6
--- /dev/null
+++ b/checkpoint-70000/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/checkpoint-70000/adapter_model/adapter_config.json b/checkpoint-70000/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..173e5213955c8b23655ab5091de8362cacab7bda
--- /dev/null
+++ b/checkpoint-70000/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+ "auto_mapping": null,
+ "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "lora_alpha": 16,
+ "lora_dropout": 0.05,
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 32,
+ "revision": null,
+ "target_modules": [
+ "gate_proj",
+ "v_proj",
+ "down_proj",
+ "k_proj",
+ "q_proj",
+ "up_proj",
+ "o_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-70000/adapter_model/adapter_model.bin b/checkpoint-70000/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..184610ccd8dd69ea3e5599df40a79e43410fef00
--- /dev/null
+++ b/checkpoint-70000/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35b27172603bfaa42af020910d0f3a0724656396738e74f39eebef1c4c53cd6c
+size 500897101
diff --git a/checkpoint-70000/optimizer.pt b/checkpoint-70000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..37ca29a6fcd4e814d9f9f815a810e09760dcc0ac
--- /dev/null
+++ b/checkpoint-70000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3757834dca752ceb36448c74c65b6c698a3cf7eac3b443be1d20520a1ef75c80
+size 1001724605
diff --git a/checkpoint-70000/rng_state.pth b/checkpoint-70000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4fc38bc7b3e0e5a8ff782d8ddc9e8837d4da8a63
--- /dev/null
+++ b/checkpoint-70000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3b4a721a0714cca4311a027981bf55d9c240a69a7f46c912f368eb795c5d17f
+size 14575
diff --git a/checkpoint-70000/scheduler.pt b/checkpoint-70000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aabf19f0c7bce27f0b6b563951dae9ee153127a8
--- /dev/null
+++ b/checkpoint-70000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d978803312071ed04341fcce57866c271d97c1ced7225c7be19f70453e4d9836
+size 627
diff --git a/checkpoint-57000/trainer_state.json b/checkpoint-70000/trainer_state.json
similarity index 88%
rename from checkpoint-57000/trainer_state.json
rename to checkpoint-70000/trainer_state.json
index d6f68f2aaf1f93c9324d725532c63d6f188b70b1..4286bc1591df5b2d35ef6a4f712eb3489bff2f85 100644
--- a/checkpoint-57000/trainer_state.json
+++ b/checkpoint-70000/trainer_state.json
@@ -1,8 +1,9 @@
{
- "best_metric": 0.49361398816108704,
- "best_model_checkpoint": "./qlora-out/checkpoint-57000",
- "epoch": 2.12520040266955,
- "global_step": 57000,
+ "best_metric": 0.47838443517684937,
+ "best_model_checkpoint": "./qlora-out/checkpoint-70000",
+ "epoch": 2.6098952313485704,
+ "eval_steps": 500,
+ "global_step": 70000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
@@ -3882,11 +3883,457 @@
"eval_samples_per_second": 0.449,
"eval_steps_per_second": 0.449,
"step": 57000
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 3.8806934461220826e-05,
+ "loss": 0.3512,
+ "step": 57100
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 3.8498560410244546e-05,
+ "loss": 0.3715,
+ "step": 57200
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 3.819112413715791e-05,
+ "loss": 0.3803,
+ "step": 57300
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 3.7884630329768875e-05,
+ "loss": 0.3785,
+ "step": 57400
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 3.757908366151463e-05,
+ "loss": 0.3626,
+ "step": 57500
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 3.72744887913904e-05,
+ "loss": 0.3981,
+ "step": 57600
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 3.697085036387822e-05,
+ "loss": 0.3918,
+ "step": 57700
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 3.6668173008876324e-05,
+ "loss": 0.3876,
+ "step": 57800
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 3.6366461341628396e-05,
+ "loss": 0.3878,
+ "step": 57900
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 3.606571996265321e-05,
+ "loss": 0.3674,
+ "step": 58000
+ },
+ {
+ "epoch": 2.16,
+ "eval_loss": 0.4916069805622101,
+ "eval_runtime": 1244.109,
+ "eval_samples_per_second": 0.436,
+ "eval_steps_per_second": 0.436,
+ "step": 58000
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 3.576595345767464e-05,
+ "loss": 0.3759,
+ "step": 58100
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 3.5467166397551524e-05,
+ "loss": 0.3987,
+ "step": 58200
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 3.5169363338208094e-05,
+ "loss": 0.3809,
+ "step": 58300
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 3.4872548820564455e-05,
+ "loss": 0.3851,
+ "step": 58400
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 3.457672737046737e-05,
+ "loss": 0.3832,
+ "step": 58500
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 3.42819034986213e-05,
+ "loss": 0.3923,
+ "step": 58600
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 3.398808170051951e-05,
+ "loss": 0.3609,
+ "step": 58700
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 3.369526645637556e-05,
+ "loss": 0.3538,
+ "step": 58800
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 3.3403462231055107e-05,
+ "loss": 0.3941,
+ "step": 58900
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 3.3112673474007584e-05,
+ "loss": 0.3984,
+ "step": 59000
+ },
+ {
+ "epoch": 2.2,
+ "eval_loss": 0.4893116354942322,
+ "eval_runtime": 1243.7748,
+ "eval_samples_per_second": 0.436,
+ "eval_steps_per_second": 0.436,
+ "step": 59000
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 3.167411635594364e-05,
+ "loss": 0.3867,
+ "step": 59500
+ },
+ {
+ "epoch": 2.22,
+ "eval_loss": 0.48985520005226135,
+ "eval_runtime": 1240.4608,
+ "eval_samples_per_second": 0.437,
+ "eval_steps_per_second": 0.437,
+ "step": 59500
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 3.0261604379828834e-05,
+ "loss": 0.3736,
+ "step": 60000
+ },
+ {
+ "epoch": 2.24,
+ "eval_loss": 0.489548921585083,
+ "eval_runtime": 1234.7527,
+ "eval_samples_per_second": 0.439,
+ "eval_steps_per_second": 0.439,
+ "step": 60000
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 2.887567598106955e-05,
+ "loss": 0.361,
+ "step": 60500
+ },
+ {
+ "epoch": 2.26,
+ "eval_loss": 0.4885287582874298,
+ "eval_runtime": 1231.4045,
+ "eval_samples_per_second": 0.44,
+ "eval_steps_per_second": 0.44,
+ "step": 60500
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 2.7516859461678857e-05,
+ "loss": 0.3778,
+ "step": 61000
+ },
+ {
+ "epoch": 2.27,
+ "eval_loss": 0.4883672893047333,
+ "eval_runtime": 1235.8497,
+ "eval_samples_per_second": 0.439,
+ "eval_steps_per_second": 0.439,
+ "step": 61000
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 2.618567278889328e-05,
+ "loss": 0.3791,
+ "step": 61500
+ },
+ {
+ "epoch": 2.29,
+ "eval_loss": 0.4874744415283203,
+ "eval_runtime": 1231.8195,
+ "eval_samples_per_second": 0.44,
+ "eval_steps_per_second": 0.44,
+ "step": 61500
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 2.4882623397728655e-05,
+ "loss": 0.3705,
+ "step": 62000
+ },
+ {
+ "epoch": 2.31,
+ "eval_loss": 0.486933171749115,
+ "eval_runtime": 1227.5583,
+ "eval_samples_per_second": 0.442,
+ "eval_steps_per_second": 0.442,
+ "step": 62000
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 2.3608207997551255e-05,
+ "loss": 0.3698,
+ "step": 62500
+ },
+ {
+ "epoch": 2.33,
+ "eval_loss": 0.48592954874038696,
+ "eval_runtime": 1282.2531,
+ "eval_samples_per_second": 0.423,
+ "eval_steps_per_second": 0.423,
+ "step": 62500
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 2.2362912382736857e-05,
+ "loss": 0.381,
+ "step": 63000
+ },
+ {
+ "epoch": 2.35,
+ "eval_loss": 0.4852922856807709,
+ "eval_runtime": 1229.4457,
+ "eval_samples_per_second": 0.441,
+ "eval_steps_per_second": 0.441,
+ "step": 63000
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 2.1147211247491084e-05,
+ "loss": 0.3728,
+ "step": 63500
+ },
+ {
+ "epoch": 2.37,
+ "eval_loss": 0.484967440366745,
+ "eval_runtime": 1296.2845,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 63500
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 1.9961568004900565e-05,
+ "loss": 0.3695,
+ "step": 64000
+ },
+ {
+ "epoch": 2.39,
+ "eval_loss": 0.4844016432762146,
+ "eval_runtime": 1317.5418,
+ "eval_samples_per_second": 0.411,
+ "eval_steps_per_second": 0.411,
+ "step": 64000
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 1.8806434610284497e-05,
+ "loss": 0.3682,
+ "step": 64500
+ },
+ {
+ "epoch": 2.4,
+ "eval_loss": 0.4838670790195465,
+ "eval_runtime": 1337.5922,
+ "eval_samples_per_second": 0.405,
+ "eval_steps_per_second": 0.405,
+ "step": 64500
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 1.768225138891393e-05,
+ "loss": 0.3594,
+ "step": 65000
+ },
+ {
+ "epoch": 2.42,
+ "eval_loss": 0.48305046558380127,
+ "eval_runtime": 1317.2888,
+ "eval_samples_per_second": 0.411,
+ "eval_steps_per_second": 0.411,
+ "step": 65000
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 1.6589446868164037e-05,
+ "loss": 0.367,
+ "step": 65500
+ },
+ {
+ "epoch": 2.44,
+ "eval_loss": 0.48225167393684387,
+ "eval_runtime": 1315.9763,
+ "eval_samples_per_second": 0.412,
+ "eval_steps_per_second": 0.412,
+ "step": 65500
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 1.552843761416395e-05,
+ "loss": 0.3781,
+ "step": 66000
+ },
+ {
+ "epoch": 2.46,
+ "eval_loss": 0.48182958364486694,
+ "eval_runtime": 1298.0711,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 66000
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 1.4499628073005733e-05,
+ "loss": 0.3632,
+ "step": 66500
+ },
+ {
+ "epoch": 2.48,
+ "eval_loss": 0.48136985301971436,
+ "eval_runtime": 1295.6256,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 66500
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 1.350341041657378e-05,
+ "loss": 0.3707,
+ "step": 67000
+ },
+ {
+ "epoch": 2.5,
+ "eval_loss": 0.48081424832344055,
+ "eval_runtime": 1297.8801,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 67000
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 1.2540164393052622e-05,
+ "loss": 0.3657,
+ "step": 67500
+ },
+ {
+ "epoch": 2.52,
+ "eval_loss": 0.48031187057495117,
+ "eval_runtime": 1299.2471,
+ "eval_samples_per_second": 0.417,
+ "eval_steps_per_second": 0.417,
+ "step": 67500
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 1.1610257182170914e-05,
+ "loss": 0.3742,
+ "step": 68000
+ },
+ {
+ "epoch": 2.54,
+ "eval_loss": 0.479922354221344,
+ "eval_runtime": 1275.2567,
+ "eval_samples_per_second": 0.425,
+ "eval_steps_per_second": 0.425,
+ "step": 68000
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 1.0714043255236094e-05,
+ "loss": 0.3761,
+ "step": 68500
+ },
+ {
+ "epoch": 2.55,
+ "eval_loss": 0.4795922338962555,
+ "eval_runtime": 1321.5276,
+ "eval_samples_per_second": 0.41,
+ "eval_steps_per_second": 0.41,
+ "step": 68500
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 9.851864240013509e-06,
+ "loss": 0.3754,
+ "step": 69000
+ },
+ {
+ "epoch": 2.57,
+ "eval_loss": 0.4789520502090454,
+ "eval_runtime": 1345.4528,
+ "eval_samples_per_second": 0.403,
+ "eval_steps_per_second": 0.403,
+ "step": 69000
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 9.024048790501272e-06,
+ "loss": 0.3594,
+ "step": 69500
+ },
+ {
+ "epoch": 2.59,
+ "eval_loss": 0.47866225242614746,
+ "eval_runtime": 1316.9883,
+ "eval_samples_per_second": 0.412,
+ "eval_steps_per_second": 0.412,
+ "step": 69500
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 8.230912461650797e-06,
+ "loss": 0.3601,
+ "step": 70000
+ },
+ {
+ "epoch": 2.61,
+ "eval_loss": 0.47838443517684937,
+ "eval_runtime": 1306.7325,
+ "eval_samples_per_second": 0.415,
+ "eval_steps_per_second": 0.415,
+ "step": 70000
}
],
+ "logging_steps": 500,
"max_steps": 80463,
"num_train_epochs": 3,
- "total_flos": 1.5981607298407956e+19,
+ "save_steps": 500,
+ "total_flos": 1.96476655962565e+19,
"trial_name": null,
"trial_params": null
}
diff --git a/checkpoint-70000/training_args.bin b/checkpoint-70000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fcdbc2e5ceda75e1111d82393dc8f31eb77db7e6
--- /dev/null
+++ b/checkpoint-70000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35750ce2c97e67db338d1121db50269062def2ea29de48747dfd43b7a072ee79
+size 4155
diff --git a/checkpoint-70500/README.md b/checkpoint-70500/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6
--- /dev/null
+++ b/checkpoint-70500/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/checkpoint-70500/adapter_config.json b/checkpoint-70500/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..173e5213955c8b23655ab5091de8362cacab7bda
--- /dev/null
+++ b/checkpoint-70500/adapter_config.json
@@ -0,0 +1,26 @@
+{
+ "auto_mapping": null,
+ "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "lora_alpha": 16,
+ "lora_dropout": 0.05,
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 32,
+ "revision": null,
+ "target_modules": [
+ "gate_proj",
+ "v_proj",
+ "down_proj",
+ "k_proj",
+ "q_proj",
+ "up_proj",
+ "o_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-70500/adapter_model.bin b/checkpoint-70500/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..65fc9cf1ea0ef35269a18503d87f4db78d9e319e
--- /dev/null
+++ b/checkpoint-70500/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a26259b6c7f10eacd37169a51779a24aa9d6a76d8fdef027422bdcbf2557c2f
+size 500897101
diff --git a/checkpoint-70500/adapter_model/README.md b/checkpoint-70500/adapter_model/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6
--- /dev/null
+++ b/checkpoint-70500/adapter_model/README.md
@@ -0,0 +1,21 @@
+---
+library_name: peft
+---
+## Training procedure
+
+
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: bfloat16
+### Framework versions
+
+
+- PEFT 0.5.0.dev0
diff --git a/checkpoint-70500/adapter_model/adapter_config.json b/checkpoint-70500/adapter_model/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..173e5213955c8b23655ab5091de8362cacab7bda
--- /dev/null
+++ b/checkpoint-70500/adapter_model/adapter_config.json
@@ -0,0 +1,26 @@
+{
+ "auto_mapping": null,
+ "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16",
+ "bias": "none",
+ "fan_in_fan_out": null,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "lora_alpha": 16,
+ "lora_dropout": 0.05,
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 32,
+ "revision": null,
+ "target_modules": [
+ "gate_proj",
+ "v_proj",
+ "down_proj",
+ "k_proj",
+ "q_proj",
+ "up_proj",
+ "o_proj"
+ ],
+ "task_type": "CAUSAL_LM"
+}
\ No newline at end of file
diff --git a/checkpoint-70500/adapter_model/adapter_model.bin b/checkpoint-70500/adapter_model/adapter_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..65fc9cf1ea0ef35269a18503d87f4db78d9e319e
--- /dev/null
+++ b/checkpoint-70500/adapter_model/adapter_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a26259b6c7f10eacd37169a51779a24aa9d6a76d8fdef027422bdcbf2557c2f
+size 500897101
diff --git a/checkpoint-70500/optimizer.pt b/checkpoint-70500/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cdc867391dc179d2218638bd9795ccbe393c93ee
--- /dev/null
+++ b/checkpoint-70500/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e892af212f732e9530af52f246a0ac8cce7e5fdd232039bb0e4c90cdd7fa3e74
+size 1001724605
diff --git a/checkpoint-70500/rng_state.pth b/checkpoint-70500/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..efa8eda190af8d0dd2138196404efa4c77c286c1
--- /dev/null
+++ b/checkpoint-70500/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a8693cacc78c05a4720cdf55aa732a0282b2cc8d97e8bde33f65f1b59bbf12e
+size 14575
diff --git a/checkpoint-70500/scheduler.pt b/checkpoint-70500/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9cd8cc5b98a47cb0df178180a370e80cbafd8e57
--- /dev/null
+++ b/checkpoint-70500/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d97c294b2bc2150ad9018c4136e33bcd18ab0fac2dca93dc8eff3b34e709e5be
+size 627
diff --git a/checkpoint-56000/trainer_state.json b/checkpoint-70500/trainer_state.json
similarity index 86%
rename from checkpoint-56000/trainer_state.json
rename to checkpoint-70500/trainer_state.json
index cb51e96cab8009017d097e82100123d384dacc6b..5495a790d7715c965c1a80f6a460922cf009acfc 100644
--- a/checkpoint-56000/trainer_state.json
+++ b/checkpoint-70500/trainer_state.json
@@ -1,8 +1,9 @@
{
- "best_metric": 0.49594032764434814,
- "best_model_checkpoint": "./qlora-out/checkpoint-56000",
- "epoch": 2.087916185078856,
- "global_step": 56000,
+ "best_metric": 0.4780386686325073,
+ "best_model_checkpoint": "./qlora-out/checkpoint-70500",
+ "epoch": 2.628537340143917,
+ "eval_steps": 500,
+ "global_step": 70500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
@@ -3814,11 +3815,539 @@
"eval_samples_per_second": 0.445,
"eval_steps_per_second": 0.445,
"step": 56000
+ },
+ {
+ "epoch": 2.09,
+ "learning_rate": 4.194121130580594e-05,
+ "loss": 0.3779,
+ "step": 56100
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 4.1623720415556336e-05,
+ "loss": 0.3651,
+ "step": 56200
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 4.1307119650556494e-05,
+ "loss": 0.3754,
+ "step": 56300
+ },
+ {
+ "epoch": 2.1,
+ "learning_rate": 4.099141383835512e-05,
+ "loss": 0.3887,
+ "step": 56400
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 4.067660779285465e-05,
+ "loss": 0.3739,
+ "step": 56500
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 4.036270631423781e-05,
+ "loss": 0.3842,
+ "step": 56600
+ },
+ {
+ "epoch": 2.11,
+ "learning_rate": 4.004971418889447e-05,
+ "loss": 0.3723,
+ "step": 56700
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 3.9737636189348634e-05,
+ "loss": 0.3889,
+ "step": 56800
+ },
+ {
+ "epoch": 2.12,
+ "learning_rate": 3.942647707418561e-05,
+ "loss": 0.3897,
+ "step": 56900
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 3.9116241587979496e-05,
+ "loss": 0.3592,
+ "step": 57000
+ },
+ {
+ "epoch": 2.13,
+ "eval_loss": 0.49361398816108704,
+ "eval_runtime": 1208.1063,
+ "eval_samples_per_second": 0.449,
+ "eval_steps_per_second": 0.449,
+ "step": 57000
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 3.8806934461220826e-05,
+ "loss": 0.3512,
+ "step": 57100
+ },
+ {
+ "epoch": 2.13,
+ "learning_rate": 3.8498560410244546e-05,
+ "loss": 0.3715,
+ "step": 57200
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 3.819112413715791e-05,
+ "loss": 0.3803,
+ "step": 57300
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 3.7884630329768875e-05,
+ "loss": 0.3785,
+ "step": 57400
+ },
+ {
+ "epoch": 2.14,
+ "learning_rate": 3.757908366151463e-05,
+ "loss": 0.3626,
+ "step": 57500
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 3.72744887913904e-05,
+ "loss": 0.3981,
+ "step": 57600
+ },
+ {
+ "epoch": 2.15,
+ "learning_rate": 3.697085036387822e-05,
+ "loss": 0.3918,
+ "step": 57700
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 3.6668173008876324e-05,
+ "loss": 0.3876,
+ "step": 57800
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 3.6366461341628396e-05,
+ "loss": 0.3878,
+ "step": 57900
+ },
+ {
+ "epoch": 2.16,
+ "learning_rate": 3.606571996265321e-05,
+ "loss": 0.3674,
+ "step": 58000
+ },
+ {
+ "epoch": 2.16,
+ "eval_loss": 0.4916069805622101,
+ "eval_runtime": 1244.109,
+ "eval_samples_per_second": 0.436,
+ "eval_steps_per_second": 0.436,
+ "step": 58000
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 3.576595345767464e-05,
+ "loss": 0.3759,
+ "step": 58100
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 3.5467166397551524e-05,
+ "loss": 0.3987,
+ "step": 58200
+ },
+ {
+ "epoch": 2.17,
+ "learning_rate": 3.5169363338208094e-05,
+ "loss": 0.3809,
+ "step": 58300
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 3.4872548820564455e-05,
+ "loss": 0.3851,
+ "step": 58400
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 3.457672737046737e-05,
+ "loss": 0.3832,
+ "step": 58500
+ },
+ {
+ "epoch": 2.18,
+ "learning_rate": 3.42819034986213e-05,
+ "loss": 0.3923,
+ "step": 58600
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 3.398808170051951e-05,
+ "loss": 0.3609,
+ "step": 58700
+ },
+ {
+ "epoch": 2.19,
+ "learning_rate": 3.369526645637556e-05,
+ "loss": 0.3538,
+ "step": 58800
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 3.3403462231055107e-05,
+ "loss": 0.3941,
+ "step": 58900
+ },
+ {
+ "epoch": 2.2,
+ "learning_rate": 3.3112673474007584e-05,
+ "loss": 0.3984,
+ "step": 59000
+ },
+ {
+ "epoch": 2.2,
+ "eval_loss": 0.4893116354942322,
+ "eval_runtime": 1243.7748,
+ "eval_samples_per_second": 0.436,
+ "eval_steps_per_second": 0.436,
+ "step": 59000
+ },
+ {
+ "epoch": 2.22,
+ "learning_rate": 3.167411635594364e-05,
+ "loss": 0.3867,
+ "step": 59500
+ },
+ {
+ "epoch": 2.22,
+ "eval_loss": 0.48985520005226135,
+ "eval_runtime": 1240.4608,
+ "eval_samples_per_second": 0.437,
+ "eval_steps_per_second": 0.437,
+ "step": 59500
+ },
+ {
+ "epoch": 2.24,
+ "learning_rate": 3.0261604379828834e-05,
+ "loss": 0.3736,
+ "step": 60000
+ },
+ {
+ "epoch": 2.24,
+ "eval_loss": 0.489548921585083,
+ "eval_runtime": 1234.7527,
+ "eval_samples_per_second": 0.439,
+ "eval_steps_per_second": 0.439,
+ "step": 60000
+ },
+ {
+ "epoch": 2.26,
+ "learning_rate": 2.887567598106955e-05,
+ "loss": 0.361,
+ "step": 60500
+ },
+ {
+ "epoch": 2.26,
+ "eval_loss": 0.4885287582874298,
+ "eval_runtime": 1231.4045,
+ "eval_samples_per_second": 0.44,
+ "eval_steps_per_second": 0.44,
+ "step": 60500
+ },
+ {
+ "epoch": 2.27,
+ "learning_rate": 2.7516859461678857e-05,
+ "loss": 0.3778,
+ "step": 61000
+ },
+ {
+ "epoch": 2.27,
+ "eval_loss": 0.4883672893047333,
+ "eval_runtime": 1235.8497,
+ "eval_samples_per_second": 0.439,
+ "eval_steps_per_second": 0.439,
+ "step": 61000
+ },
+ {
+ "epoch": 2.29,
+ "learning_rate": 2.618567278889328e-05,
+ "loss": 0.3791,
+ "step": 61500
+ },
+ {
+ "epoch": 2.29,
+ "eval_loss": 0.4874744415283203,
+ "eval_runtime": 1231.8195,
+ "eval_samples_per_second": 0.44,
+ "eval_steps_per_second": 0.44,
+ "step": 61500
+ },
+ {
+ "epoch": 2.31,
+ "learning_rate": 2.4882623397728655e-05,
+ "loss": 0.3705,
+ "step": 62000
+ },
+ {
+ "epoch": 2.31,
+ "eval_loss": 0.486933171749115,
+ "eval_runtime": 1227.5583,
+ "eval_samples_per_second": 0.442,
+ "eval_steps_per_second": 0.442,
+ "step": 62000
+ },
+ {
+ "epoch": 2.33,
+ "learning_rate": 2.3608207997551255e-05,
+ "loss": 0.3698,
+ "step": 62500
+ },
+ {
+ "epoch": 2.33,
+ "eval_loss": 0.48592954874038696,
+ "eval_runtime": 1282.2531,
+ "eval_samples_per_second": 0.423,
+ "eval_steps_per_second": 0.423,
+ "step": 62500
+ },
+ {
+ "epoch": 2.35,
+ "learning_rate": 2.2362912382736857e-05,
+ "loss": 0.381,
+ "step": 63000
+ },
+ {
+ "epoch": 2.35,
+ "eval_loss": 0.4852922856807709,
+ "eval_runtime": 1229.4457,
+ "eval_samples_per_second": 0.441,
+ "eval_steps_per_second": 0.441,
+ "step": 63000
+ },
+ {
+ "epoch": 2.37,
+ "learning_rate": 2.1147211247491084e-05,
+ "loss": 0.3728,
+ "step": 63500
+ },
+ {
+ "epoch": 2.37,
+ "eval_loss": 0.484967440366745,
+ "eval_runtime": 1296.2845,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 63500
+ },
+ {
+ "epoch": 2.39,
+ "learning_rate": 1.9961568004900565e-05,
+ "loss": 0.3695,
+ "step": 64000
+ },
+ {
+ "epoch": 2.39,
+ "eval_loss": 0.4844016432762146,
+ "eval_runtime": 1317.5418,
+ "eval_samples_per_second": 0.411,
+ "eval_steps_per_second": 0.411,
+ "step": 64000
+ },
+ {
+ "epoch": 2.4,
+ "learning_rate": 1.8806434610284497e-05,
+ "loss": 0.3682,
+ "step": 64500
+ },
+ {
+ "epoch": 2.4,
+ "eval_loss": 0.4838670790195465,
+ "eval_runtime": 1337.5922,
+ "eval_samples_per_second": 0.405,
+ "eval_steps_per_second": 0.405,
+ "step": 64500
+ },
+ {
+ "epoch": 2.42,
+ "learning_rate": 1.768225138891393e-05,
+ "loss": 0.3594,
+ "step": 65000
+ },
+ {
+ "epoch": 2.42,
+ "eval_loss": 0.48305046558380127,
+ "eval_runtime": 1317.2888,
+ "eval_samples_per_second": 0.411,
+ "eval_steps_per_second": 0.411,
+ "step": 65000
+ },
+ {
+ "epoch": 2.44,
+ "learning_rate": 1.6589446868164037e-05,
+ "loss": 0.367,
+ "step": 65500
+ },
+ {
+ "epoch": 2.44,
+ "eval_loss": 0.48225167393684387,
+ "eval_runtime": 1315.9763,
+ "eval_samples_per_second": 0.412,
+ "eval_steps_per_second": 0.412,
+ "step": 65500
+ },
+ {
+ "epoch": 2.46,
+ "learning_rate": 1.552843761416395e-05,
+ "loss": 0.3781,
+ "step": 66000
+ },
+ {
+ "epoch": 2.46,
+ "eval_loss": 0.48182958364486694,
+ "eval_runtime": 1298.0711,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 66000
+ },
+ {
+ "epoch": 2.48,
+ "learning_rate": 1.4499628073005733e-05,
+ "loss": 0.3632,
+ "step": 66500
+ },
+ {
+ "epoch": 2.48,
+ "eval_loss": 0.48136985301971436,
+ "eval_runtime": 1295.6256,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 66500
+ },
+ {
+ "epoch": 2.5,
+ "learning_rate": 1.350341041657378e-05,
+ "loss": 0.3707,
+ "step": 67000
+ },
+ {
+ "epoch": 2.5,
+ "eval_loss": 0.48081424832344055,
+ "eval_runtime": 1297.8801,
+ "eval_samples_per_second": 0.418,
+ "eval_steps_per_second": 0.418,
+ "step": 67000
+ },
+ {
+ "epoch": 2.52,
+ "learning_rate": 1.2540164393052622e-05,
+ "loss": 0.3657,
+ "step": 67500
+ },
+ {
+ "epoch": 2.52,
+ "eval_loss": 0.48031187057495117,
+ "eval_runtime": 1299.2471,
+ "eval_samples_per_second": 0.417,
+ "eval_steps_per_second": 0.417,
+ "step": 67500
+ },
+ {
+ "epoch": 2.54,
+ "learning_rate": 1.1610257182170914e-05,
+ "loss": 0.3742,
+ "step": 68000
+ },
+ {
+ "epoch": 2.54,
+ "eval_loss": 0.479922354221344,
+ "eval_runtime": 1275.2567,
+ "eval_samples_per_second": 0.425,
+ "eval_steps_per_second": 0.425,
+ "step": 68000
+ },
+ {
+ "epoch": 2.55,
+ "learning_rate": 1.0714043255236094e-05,
+ "loss": 0.3761,
+ "step": 68500
+ },
+ {
+ "epoch": 2.55,
+ "eval_loss": 0.4795922338962555,
+ "eval_runtime": 1321.5276,
+ "eval_samples_per_second": 0.41,
+ "eval_steps_per_second": 0.41,
+ "step": 68500
+ },
+ {
+ "epoch": 2.57,
+ "learning_rate": 9.851864240013509e-06,
+ "loss": 0.3754,
+ "step": 69000
+ },
+ {
+ "epoch": 2.57,
+ "eval_loss": 0.4789520502090454,
+ "eval_runtime": 1345.4528,
+ "eval_samples_per_second": 0.403,
+ "eval_steps_per_second": 0.403,
+ "step": 69000
+ },
+ {
+ "epoch": 2.59,
+ "learning_rate": 9.024048790501272e-06,
+ "loss": 0.3594,
+ "step": 69500
+ },
+ {
+ "epoch": 2.59,
+ "eval_loss": 0.47866225242614746,
+ "eval_runtime": 1316.9883,
+ "eval_samples_per_second": 0.412,
+ "eval_steps_per_second": 0.412,
+ "step": 69500
+ },
+ {
+ "epoch": 2.61,
+ "learning_rate": 8.230912461650797e-06,
+ "loss": 0.3601,
+ "step": 70000
+ },
+ {
+ "epoch": 2.61,
+ "eval_loss": 0.47838443517684937,
+ "eval_runtime": 1306.7325,
+ "eval_samples_per_second": 0.415,
+ "eval_steps_per_second": 0.415,
+ "step": 70000
+ },
+ {
+ "epoch": 2.63,
+ "learning_rate": 7.472757589080226e-06,
+ "loss": 0.3614,
+ "step": 70500
+ },
+ {
+ "epoch": 2.63,
+ "eval_loss": 0.4780386686325073,
+ "eval_runtime": 1290.4017,
+ "eval_samples_per_second": 0.42,
+ "eval_steps_per_second": 0.42,
+ "step": 70500
}
],
+ "logging_steps": 500,
"max_steps": 80463,
"num_train_epochs": 3,
- "total_flos": 1.5701320307350487e+19,
+ "save_steps": 500,
+ "total_flos": 1.978819419542102e+19,
"trial_name": null,
"trial_params": null
}
diff --git a/checkpoint-70500/training_args.bin b/checkpoint-70500/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fcdbc2e5ceda75e1111d82393dc8f31eb77db7e6
--- /dev/null
+++ b/checkpoint-70500/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35750ce2c97e67db338d1121db50269062def2ea29de48747dfd43b7a072ee79
+size 4155
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..3f58a5e115855c6ea3cec98accae196ad927222e
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,6 @@
+{
+ "bos_token": "",
+ "eos_token": "",
+ "pad_token": "[PAD]",
+ "unk_token": ""
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..929a285897b674a9e6f4208e87fdbaf8db10899d
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,36 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "bos_token": {
+ "__type": "AddedToken",
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "clean_up_tokenization_spaces": false,
+ "eos_token": {
+ "__type": "AddedToken",
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "legacy": null,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": null,
+ "sp_model_kwargs": {},
+ "tokenizer_class": "LlamaTokenizer",
+ "trust_remote_code": false,
+ "unk_token": {
+ "__type": "AddedToken",
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "use_fast": true
+}
diff --git a/training_args.bin b/training_args.bin
index 5fa131d335bef0de487e84cca21c03f6e4d05ac0..fcdbc2e5ceda75e1111d82393dc8f31eb77db7e6 100644
--- a/training_args.bin
+++ b/training_args.bin
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:f8ca8c55b410908f1a6fb4d78d55fe6aad82bbca76ec8021e18981496f18fa70
-size 4027
+oid sha256:35750ce2c97e67db338d1121db50269062def2ea29de48747dfd43b7a072ee79
+size 4155