diff --git a/adapter_config.json b/adapter_config.json index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..173e5213955c8b23655ab5091de8362cacab7bda 100644 --- a/adapter_config.json +++ b/adapter_config.json @@ -14,12 +14,12 @@ "r": 32, "revision": null, "target_modules": [ - "up_proj", - "down_proj", - "q_proj", + "gate_proj", "v_proj", + "down_proj", "k_proj", - "gate_proj", + "q_proj", + "up_proj", "o_proj" ], "task_type": "CAUSAL_LM" diff --git a/adapter_model.bin b/adapter_model.bin index ac3b1522b1c7533a73f85bbe0bb6c7ee8f6b2132..65fc9cf1ea0ef35269a18503d87f4db78d9e319e 100644 --- a/adapter_model.bin +++ b/adapter_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f56b8a333605f03b496496aac3531e5eb50e390d67be06083619275a78de77da +oid sha256:8a26259b6c7f10eacd37169a51779a24aa9d6a76d8fdef027422bdcbf2557c2f size 500897101 diff --git a/checkpoint-56000/adapter_model.bin b/checkpoint-56000/adapter_model.bin deleted file mode 100644 index 5660869c0b783f1993700a8f87cfc7179b9a6cdf..0000000000000000000000000000000000000000 --- a/checkpoint-56000/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d61b2ab661f17f1b28e07a6ea4c559efd2487b69440f512fbda902147b2007f -size 500897101 diff --git a/checkpoint-56000/adapter_model/adapter_model.bin b/checkpoint-56000/adapter_model/adapter_model.bin deleted file mode 100644 index 5660869c0b783f1993700a8f87cfc7179b9a6cdf..0000000000000000000000000000000000000000 --- a/checkpoint-56000/adapter_model/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d61b2ab661f17f1b28e07a6ea4c559efd2487b69440f512fbda902147b2007f -size 500897101 diff --git a/checkpoint-56000/optimizer.pt b/checkpoint-56000/optimizer.pt deleted file mode 100644 index 7f861d2cbf487eb06ec9f15c270e9d165caa125c..0000000000000000000000000000000000000000 --- a/checkpoint-56000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e0d0299416431a6687f29eb725bd8536e5bc5512ff27981755266d125bd960dc -size 1001723453 diff --git a/checkpoint-56000/rng_state.pth b/checkpoint-56000/rng_state.pth deleted file mode 100644 index c5889daf2f1eef23476d55d0aa0b6145f68cf00f..0000000000000000000000000000000000000000 --- a/checkpoint-56000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d223168e1cf1a8cbe3b356c3a0cf2b7c1b147eab84d53ec37dea66d1618867f6 -size 14575 diff --git a/checkpoint-56000/scheduler.pt b/checkpoint-56000/scheduler.pt deleted file mode 100644 index 1707969652b814b0689ecc48e49487d339c43f91..0000000000000000000000000000000000000000 --- a/checkpoint-56000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:40f07a3bf2b8b7e85bd7ec32b459bd8eba34e3ffd70129884ee8cac79708a84f -size 627 diff --git a/checkpoint-56000/training_args.bin b/checkpoint-56000/training_args.bin deleted file mode 100644 index 5fa131d335bef0de487e84cca21c03f6e4d05ac0..0000000000000000000000000000000000000000 --- a/checkpoint-56000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f8ca8c55b410908f1a6fb4d78d55fe6aad82bbca76ec8021e18981496f18fa70 -size 4027 diff --git a/checkpoint-57000/adapter_model.bin b/checkpoint-57000/adapter_model.bin deleted file mode 100644 index 9e7902152e7806e737b702ec14dcc5c30080cdda..0000000000000000000000000000000000000000 --- a/checkpoint-57000/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9463fbc37a2c37f850b2aa713212bd675cce373b2a226f9fecf647f60157d1a1 -size 500897101 diff --git a/checkpoint-57000/adapter_model/adapter_model.bin b/checkpoint-57000/adapter_model/adapter_model.bin deleted file mode 100644 index 9e7902152e7806e737b702ec14dcc5c30080cdda..0000000000000000000000000000000000000000 --- a/checkpoint-57000/adapter_model/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9463fbc37a2c37f850b2aa713212bd675cce373b2a226f9fecf647f60157d1a1 -size 500897101 diff --git a/checkpoint-57000/optimizer.pt b/checkpoint-57000/optimizer.pt deleted file mode 100644 index 0d520db5dadf7329d5d4230fc36be8d0361c9ec9..0000000000000000000000000000000000000000 --- a/checkpoint-57000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d85e0cce4ea774ece1bba3b083129dd4ea4f075278346655fd271c9663edf7a0 -size 1001723453 diff --git a/checkpoint-57000/rng_state.pth b/checkpoint-57000/rng_state.pth deleted file mode 100644 index b1b5850951fe801890b55c988111ecc898d31225..0000000000000000000000000000000000000000 --- a/checkpoint-57000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e7b0ae395ccd0b4875fa94f8cd4ee3274662f44279f744979610604a15d72da0 -size 14575 diff --git a/checkpoint-57000/scheduler.pt b/checkpoint-57000/scheduler.pt deleted file mode 100644 index 2071308d81a1e00542811e957086cf96adb3de83..0000000000000000000000000000000000000000 --- a/checkpoint-57000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7c18b73ff8e0ca9bda5d92134e841aafa154377e996a5dd3b1b1a3a0b329e74e -size 627 diff --git a/checkpoint-57000/training_args.bin b/checkpoint-57000/training_args.bin deleted file mode 100644 index 5fa131d335bef0de487e84cca21c03f6e4d05ac0..0000000000000000000000000000000000000000 --- a/checkpoint-57000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f8ca8c55b410908f1a6fb4d78d55fe6aad82bbca76ec8021e18981496f18fa70 -size 4027 diff --git a/checkpoint-58000/README.md b/checkpoint-58000/README.md deleted file mode 100644 index f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c..0000000000000000000000000000000000000000 --- a/checkpoint-58000/README.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.5.0.dev0 diff --git a/checkpoint-58000/adapter_config.json b/checkpoint-58000/adapter_config.json deleted file mode 100644 index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..0000000000000000000000000000000000000000 --- a/checkpoint-58000/adapter_config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "auto_mapping": null, - "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16", - "bias": "none", - "fan_in_fan_out": null, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 16, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 32, - "revision": null, - "target_modules": [ - "up_proj", - "down_proj", - "q_proj", - "v_proj", - "k_proj", - "gate_proj", - "o_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/checkpoint-58000/adapter_model.bin b/checkpoint-58000/adapter_model.bin deleted file mode 100644 index 38268d0d52c3d2167e0d36ff92bf514b9a21f10e..0000000000000000000000000000000000000000 --- a/checkpoint-58000/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf9efdf73d7ecc9f45ca166bec5b70555182c38338e6de139c6203b8a009fc59 -size 500897101 diff --git a/checkpoint-58000/adapter_model/README.md b/checkpoint-58000/adapter_model/README.md deleted file mode 100644 index f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c..0000000000000000000000000000000000000000 --- a/checkpoint-58000/adapter_model/README.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.5.0.dev0 diff --git a/checkpoint-58000/adapter_model/adapter_config.json b/checkpoint-58000/adapter_model/adapter_config.json deleted file mode 100644 index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..0000000000000000000000000000000000000000 --- a/checkpoint-58000/adapter_model/adapter_config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "auto_mapping": null, - "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16", - "bias": "none", - "fan_in_fan_out": null, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 16, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 32, - "revision": null, - "target_modules": [ - "up_proj", - "down_proj", - "q_proj", - "v_proj", - "k_proj", - "gate_proj", - "o_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/checkpoint-58000/adapter_model/adapter_model.bin b/checkpoint-58000/adapter_model/adapter_model.bin deleted file mode 100644 index 38268d0d52c3d2167e0d36ff92bf514b9a21f10e..0000000000000000000000000000000000000000 --- a/checkpoint-58000/adapter_model/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf9efdf73d7ecc9f45ca166bec5b70555182c38338e6de139c6203b8a009fc59 -size 500897101 diff --git a/checkpoint-58000/optimizer.pt b/checkpoint-58000/optimizer.pt deleted file mode 100644 index 9bbb843998b8bf92c75b68973a7fdcbb9ce63d7f..0000000000000000000000000000000000000000 --- a/checkpoint-58000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:67f192e31625a5f9d71aaeb75826e3461458c994c58bc8d3d5b3b59fa56efc4b -size 1001723453 diff --git a/checkpoint-58000/rng_state.pth b/checkpoint-58000/rng_state.pth deleted file mode 100644 index 70494ee8fdee686594723ae0f399ca93c60a4875..0000000000000000000000000000000000000000 --- a/checkpoint-58000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5923ba7d43395d2ab7a25af40d67f773d9e67e462f9250548814d4e4d1853054 -size 14575 diff --git a/checkpoint-58000/scheduler.pt b/checkpoint-58000/scheduler.pt deleted file mode 100644 index 2285fd7f23746958e4a8cb75acb768d7a2250aa7..0000000000000000000000000000000000000000 --- a/checkpoint-58000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eb6d23b542a910d4d880a9ad37544effe8607b091db3f3b955d778af0357176f -size 627 diff --git a/checkpoint-58000/training_args.bin b/checkpoint-58000/training_args.bin deleted file mode 100644 index 5fa131d335bef0de487e84cca21c03f6e4d05ac0..0000000000000000000000000000000000000000 --- a/checkpoint-58000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f8ca8c55b410908f1a6fb4d78d55fe6aad82bbca76ec8021e18981496f18fa70 -size 4027 diff --git a/checkpoint-59000/README.md b/checkpoint-59000/README.md deleted file mode 100644 index f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c..0000000000000000000000000000000000000000 --- a/checkpoint-59000/README.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.5.0.dev0 diff --git a/checkpoint-59000/adapter_config.json b/checkpoint-59000/adapter_config.json deleted file mode 100644 index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..0000000000000000000000000000000000000000 --- a/checkpoint-59000/adapter_config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "auto_mapping": null, - "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16", - "bias": "none", - "fan_in_fan_out": null, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 16, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 32, - "revision": null, - "target_modules": [ - "up_proj", - "down_proj", - "q_proj", - "v_proj", - "k_proj", - "gate_proj", - "o_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/checkpoint-59000/adapter_model.bin b/checkpoint-59000/adapter_model.bin deleted file mode 100644 index ac3b1522b1c7533a73f85bbe0bb6c7ee8f6b2132..0000000000000000000000000000000000000000 --- a/checkpoint-59000/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f56b8a333605f03b496496aac3531e5eb50e390d67be06083619275a78de77da -size 500897101 diff --git a/checkpoint-59000/optimizer.pt b/checkpoint-59000/optimizer.pt deleted file mode 100644 index aff4f19ffec18b9cc4623ef3a567549e1495e792..0000000000000000000000000000000000000000 --- a/checkpoint-59000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b26d7da827461914ac19ca3bc7f168368f4015c2a5364188dfe94a4e3cfde0cb -size 1001723453 diff --git a/checkpoint-59000/rng_state.pth b/checkpoint-59000/rng_state.pth deleted file mode 100644 index da05bf34ede76c80250c8b168b2b4a471506aa01..0000000000000000000000000000000000000000 --- a/checkpoint-59000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8bd334de4d3525ea70c0977c8fe7956563ce9e7d3af12dc2b9fcbbc68894cb2d -size 14575 diff --git a/checkpoint-59000/scheduler.pt b/checkpoint-59000/scheduler.pt deleted file mode 100644 index 51be63987e18fc71d49a0fe7cfb65e47982988f2..0000000000000000000000000000000000000000 --- a/checkpoint-59000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:67aad82a87c2a78c7bf3dfc2188cc16487d1a53a6ab0632026c89faf1cd6731c -size 627 diff --git a/checkpoint-59000/training_args.bin b/checkpoint-59000/training_args.bin deleted file mode 100644 index 5fa131d335bef0de487e84cca21c03f6e4d05ac0..0000000000000000000000000000000000000000 --- a/checkpoint-59000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f8ca8c55b410908f1a6fb4d78d55fe6aad82bbca76ec8021e18981496f18fa70 -size 4027 diff --git a/checkpoint-56000/README.md b/checkpoint-69000/README.md similarity index 93% rename from checkpoint-56000/README.md rename to checkpoint-69000/README.md index f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c..f397922221c4a2f56d632b66d68ab92408f4d0f6 100644 --- a/checkpoint-56000/README.md +++ b/checkpoint-69000/README.md @@ -5,6 +5,7 @@ library_name: peft The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes - load_in_8bit: False - load_in_4bit: True - llm_int8_threshold: 6.0 diff --git a/checkpoint-56000/adapter_model/adapter_config.json b/checkpoint-69000/adapter_config.json similarity index 100% rename from checkpoint-56000/adapter_model/adapter_config.json rename to checkpoint-69000/adapter_config.json index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..173e5213955c8b23655ab5091de8362cacab7bda 100644 --- a/checkpoint-56000/adapter_model/adapter_config.json +++ b/checkpoint-69000/adapter_config.json @@ -14,12 +14,12 @@ "r": 32, "revision": null, "target_modules": [ - "up_proj", - "down_proj", - "q_proj", + "gate_proj", "v_proj", + "down_proj", "k_proj", - "gate_proj", + "q_proj", + "up_proj", "o_proj" ], "task_type": "CAUSAL_LM" diff --git a/checkpoint-69000/adapter_model.bin b/checkpoint-69000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..71903bbe3394aef44445334a8aeaf8a5b325b36d --- /dev/null +++ b/checkpoint-69000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16127581d1b65765200af747a5c98d27b237b49430e306dfd23a9c3ad6af3b9c +size 500897101 diff --git a/checkpoint-57000/README.md b/checkpoint-69000/adapter_model/README.md similarity index 93% rename from checkpoint-57000/README.md rename to checkpoint-69000/adapter_model/README.md index f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c..f397922221c4a2f56d632b66d68ab92408f4d0f6 100644 --- a/checkpoint-57000/README.md +++ b/checkpoint-69000/adapter_model/README.md @@ -5,6 +5,7 @@ library_name: peft The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes - load_in_8bit: False - load_in_4bit: True - llm_int8_threshold: 6.0 diff --git a/checkpoint-56000/adapter_config.json b/checkpoint-69000/adapter_model/adapter_config.json similarity index 100% rename from checkpoint-56000/adapter_config.json rename to checkpoint-69000/adapter_model/adapter_config.json index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..173e5213955c8b23655ab5091de8362cacab7bda 100644 --- a/checkpoint-56000/adapter_config.json +++ b/checkpoint-69000/adapter_model/adapter_config.json @@ -14,12 +14,12 @@ "r": 32, "revision": null, "target_modules": [ - "up_proj", - "down_proj", - "q_proj", + "gate_proj", "v_proj", + "down_proj", "k_proj", - "gate_proj", + "q_proj", + "up_proj", "o_proj" ], "task_type": "CAUSAL_LM" diff --git a/checkpoint-69000/adapter_model/adapter_model.bin b/checkpoint-69000/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..71903bbe3394aef44445334a8aeaf8a5b325b36d --- /dev/null +++ b/checkpoint-69000/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16127581d1b65765200af747a5c98d27b237b49430e306dfd23a9c3ad6af3b9c +size 500897101 diff --git a/checkpoint-69000/optimizer.pt b/checkpoint-69000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..50d7da297d84d8bcccbb2e41c8b73d63ed0c1b96 --- /dev/null +++ b/checkpoint-69000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52478f59ec5c65d4db6d79009fc0c477e003ba9db2b5648781779b6963bc40cb +size 1001724605 diff --git a/checkpoint-69000/rng_state.pth b/checkpoint-69000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8866aa3b38194844d50a80dfd7dcfead003f32da --- /dev/null +++ b/checkpoint-69000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7732edd0ae5999edb700e14bae64e828df5241beb83fbee05815f6c10b73570 +size 14575 diff --git a/checkpoint-69000/scheduler.pt b/checkpoint-69000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..437184049237e0e08f28edb326199e61b88e5ad7 --- /dev/null +++ b/checkpoint-69000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0f5690258b17f07cbd583d2e586e1be27217d957aa1adadeb296ee58f808a87 +size 627 diff --git a/checkpoint-59000/trainer_state.json b/checkpoint-69000/trainer_state.json similarity index 92% rename from checkpoint-59000/trainer_state.json rename to checkpoint-69000/trainer_state.json index a574a20b178ba231bb7bbb0feab8d2272b572097..872a75702b83ffc0648cf4e59ff1bad375c34d07 100644 --- a/checkpoint-59000/trainer_state.json +++ b/checkpoint-69000/trainer_state.json @@ -1,8 +1,9 @@ { - "best_metric": 0.4893116354942322, - "best_model_checkpoint": "./qlora-out/checkpoint-59000", - "epoch": 2.1997688378509377, - "global_step": 59000, + "best_metric": 0.4789520502090454, + "best_model_checkpoint": "./qlora-out/checkpoint-69000", + "epoch": 2.5726110137578764, + "eval_steps": 500, + "global_step": 69000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -4018,11 +4019,293 @@ "eval_samples_per_second": 0.436, "eval_steps_per_second": 0.436, "step": 59000 + }, + { + "epoch": 2.22, + "learning_rate": 3.167411635594364e-05, + "loss": 0.3867, + "step": 59500 + }, + { + "epoch": 2.22, + "eval_loss": 0.48985520005226135, + "eval_runtime": 1240.4608, + "eval_samples_per_second": 0.437, + "eval_steps_per_second": 0.437, + "step": 59500 + }, + { + "epoch": 2.24, + "learning_rate": 3.0261604379828834e-05, + "loss": 0.3736, + "step": 60000 + }, + { + "epoch": 2.24, + "eval_loss": 0.489548921585083, + "eval_runtime": 1234.7527, + "eval_samples_per_second": 0.439, + "eval_steps_per_second": 0.439, + "step": 60000 + }, + { + "epoch": 2.26, + "learning_rate": 2.887567598106955e-05, + "loss": 0.361, + "step": 60500 + }, + { + "epoch": 2.26, + "eval_loss": 0.4885287582874298, + "eval_runtime": 1231.4045, + "eval_samples_per_second": 0.44, + "eval_steps_per_second": 0.44, + "step": 60500 + }, + { + "epoch": 2.27, + "learning_rate": 2.7516859461678857e-05, + "loss": 0.3778, + "step": 61000 + }, + { + "epoch": 2.27, + "eval_loss": 0.4883672893047333, + "eval_runtime": 1235.8497, + "eval_samples_per_second": 0.439, + "eval_steps_per_second": 0.439, + "step": 61000 + }, + { + "epoch": 2.29, + "learning_rate": 2.618567278889328e-05, + "loss": 0.3791, + "step": 61500 + }, + { + "epoch": 2.29, + "eval_loss": 0.4874744415283203, + "eval_runtime": 1231.8195, + "eval_samples_per_second": 0.44, + "eval_steps_per_second": 0.44, + "step": 61500 + }, + { + "epoch": 2.31, + "learning_rate": 2.4882623397728655e-05, + "loss": 0.3705, + "step": 62000 + }, + { + "epoch": 2.31, + "eval_loss": 0.486933171749115, + "eval_runtime": 1227.5583, + "eval_samples_per_second": 0.442, + "eval_steps_per_second": 0.442, + "step": 62000 + }, + { + "epoch": 2.33, + "learning_rate": 2.3608207997551255e-05, + "loss": 0.3698, + "step": 62500 + }, + { + "epoch": 2.33, + "eval_loss": 0.48592954874038696, + "eval_runtime": 1282.2531, + "eval_samples_per_second": 0.423, + "eval_steps_per_second": 0.423, + "step": 62500 + }, + { + "epoch": 2.35, + "learning_rate": 2.2362912382736857e-05, + "loss": 0.381, + "step": 63000 + }, + { + "epoch": 2.35, + "eval_loss": 0.4852922856807709, + "eval_runtime": 1229.4457, + "eval_samples_per_second": 0.441, + "eval_steps_per_second": 0.441, + "step": 63000 + }, + { + "epoch": 2.37, + "learning_rate": 2.1147211247491084e-05, + "loss": 0.3728, + "step": 63500 + }, + { + "epoch": 2.37, + "eval_loss": 0.484967440366745, + "eval_runtime": 1296.2845, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 63500 + }, + { + "epoch": 2.39, + "learning_rate": 1.9961568004900565e-05, + "loss": 0.3695, + "step": 64000 + }, + { + "epoch": 2.39, + "eval_loss": 0.4844016432762146, + "eval_runtime": 1317.5418, + "eval_samples_per_second": 0.411, + "eval_steps_per_second": 0.411, + "step": 64000 + }, + { + "epoch": 2.4, + "learning_rate": 1.8806434610284497e-05, + "loss": 0.3682, + "step": 64500 + }, + { + "epoch": 2.4, + "eval_loss": 0.4838670790195465, + "eval_runtime": 1337.5922, + "eval_samples_per_second": 0.405, + "eval_steps_per_second": 0.405, + "step": 64500 + }, + { + "epoch": 2.42, + "learning_rate": 1.768225138891393e-05, + "loss": 0.3594, + "step": 65000 + }, + { + "epoch": 2.42, + "eval_loss": 0.48305046558380127, + "eval_runtime": 1317.2888, + "eval_samples_per_second": 0.411, + "eval_steps_per_second": 0.411, + "step": 65000 + }, + { + "epoch": 2.44, + "learning_rate": 1.6589446868164037e-05, + "loss": 0.367, + "step": 65500 + }, + { + "epoch": 2.44, + "eval_loss": 0.48225167393684387, + "eval_runtime": 1315.9763, + "eval_samples_per_second": 0.412, + "eval_steps_per_second": 0.412, + "step": 65500 + }, + { + "epoch": 2.46, + "learning_rate": 1.552843761416395e-05, + "loss": 0.3781, + "step": 66000 + }, + { + "epoch": 2.46, + "eval_loss": 0.48182958364486694, + "eval_runtime": 1298.0711, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 66000 + }, + { + "epoch": 2.48, + "learning_rate": 1.4499628073005733e-05, + "loss": 0.3632, + "step": 66500 + }, + { + "epoch": 2.48, + "eval_loss": 0.48136985301971436, + "eval_runtime": 1295.6256, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 66500 + }, + { + "epoch": 2.5, + "learning_rate": 1.350341041657378e-05, + "loss": 0.3707, + "step": 67000 + }, + { + "epoch": 2.5, + "eval_loss": 0.48081424832344055, + "eval_runtime": 1297.8801, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 67000 + }, + { + "epoch": 2.52, + "learning_rate": 1.2540164393052622e-05, + "loss": 0.3657, + "step": 67500 + }, + { + "epoch": 2.52, + "eval_loss": 0.48031187057495117, + "eval_runtime": 1299.2471, + "eval_samples_per_second": 0.417, + "eval_steps_per_second": 0.417, + "step": 67500 + }, + { + "epoch": 2.54, + "learning_rate": 1.1610257182170914e-05, + "loss": 0.3742, + "step": 68000 + }, + { + "epoch": 2.54, + "eval_loss": 0.479922354221344, + "eval_runtime": 1275.2567, + "eval_samples_per_second": 0.425, + "eval_steps_per_second": 0.425, + "step": 68000 + }, + { + "epoch": 2.55, + "learning_rate": 1.0714043255236094e-05, + "loss": 0.3761, + "step": 68500 + }, + { + "epoch": 2.55, + "eval_loss": 0.4795922338962555, + "eval_runtime": 1321.5276, + "eval_samples_per_second": 0.41, + "eval_steps_per_second": 0.41, + "step": 68500 + }, + { + "epoch": 2.57, + "learning_rate": 9.851864240013509e-06, + "loss": 0.3754, + "step": 69000 + }, + { + "epoch": 2.57, + "eval_loss": 0.4789520502090454, + "eval_runtime": 1345.4528, + "eval_samples_per_second": 0.403, + "eval_steps_per_second": 0.403, + "step": 69000 } ], + "logging_steps": 500, "max_steps": 80463, "num_train_epochs": 3, - "total_flos": 1.6542001385066742e+19, + "save_steps": 500, + "total_flos": 1.9364073941589443e+19, "trial_name": null, "trial_params": null } diff --git a/checkpoint-69000/training_args.bin b/checkpoint-69000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fcdbc2e5ceda75e1111d82393dc8f31eb77db7e6 --- /dev/null +++ b/checkpoint-69000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35750ce2c97e67db338d1121db50269062def2ea29de48747dfd43b7a072ee79 +size 4155 diff --git a/checkpoint-57000/adapter_model/README.md b/checkpoint-69500/README.md similarity index 93% rename from checkpoint-57000/adapter_model/README.md rename to checkpoint-69500/README.md index f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c..f397922221c4a2f56d632b66d68ab92408f4d0f6 100644 --- a/checkpoint-57000/adapter_model/README.md +++ b/checkpoint-69500/README.md @@ -5,6 +5,7 @@ library_name: peft The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes - load_in_8bit: False - load_in_4bit: True - llm_int8_threshold: 6.0 diff --git a/checkpoint-57000/adapter_config.json b/checkpoint-69500/adapter_config.json similarity index 100% rename from checkpoint-57000/adapter_config.json rename to checkpoint-69500/adapter_config.json index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..173e5213955c8b23655ab5091de8362cacab7bda 100644 --- a/checkpoint-57000/adapter_config.json +++ b/checkpoint-69500/adapter_config.json @@ -14,12 +14,12 @@ "r": 32, "revision": null, "target_modules": [ - "up_proj", - "down_proj", - "q_proj", + "gate_proj", "v_proj", + "down_proj", "k_proj", - "gate_proj", + "q_proj", + "up_proj", "o_proj" ], "task_type": "CAUSAL_LM" diff --git a/checkpoint-69500/adapter_model.bin b/checkpoint-69500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8077f59997c7d8fa6a0a7c7e9292ff33a8107f9b --- /dev/null +++ b/checkpoint-69500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d536051f2a1ab536e6e716808efa406b8fc4bc641ebcf6102a663de9eab5ffe +size 500897101 diff --git a/checkpoint-56000/adapter_model/README.md b/checkpoint-69500/adapter_model/README.md similarity index 93% rename from checkpoint-56000/adapter_model/README.md rename to checkpoint-69500/adapter_model/README.md index f2208b0ded6c10ed47b2ea9df5ab7c8dd721a53c..f397922221c4a2f56d632b66d68ab92408f4d0f6 100644 --- a/checkpoint-56000/adapter_model/README.md +++ b/checkpoint-69500/adapter_model/README.md @@ -5,6 +5,7 @@ library_name: peft The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes - load_in_8bit: False - load_in_4bit: True - llm_int8_threshold: 6.0 diff --git a/checkpoint-57000/adapter_model/adapter_config.json b/checkpoint-69500/adapter_model/adapter_config.json similarity index 100% rename from checkpoint-57000/adapter_model/adapter_config.json rename to checkpoint-69500/adapter_model/adapter_config.json index 2de1cc0f033fef3955d6a6d0ed6bdd49d06426f2..173e5213955c8b23655ab5091de8362cacab7bda 100644 --- a/checkpoint-57000/adapter_model/adapter_config.json +++ b/checkpoint-69500/adapter_model/adapter_config.json @@ -14,12 +14,12 @@ "r": 32, "revision": null, "target_modules": [ - "up_proj", - "down_proj", - "q_proj", + "gate_proj", "v_proj", + "down_proj", "k_proj", - "gate_proj", + "q_proj", + "up_proj", "o_proj" ], "task_type": "CAUSAL_LM" diff --git a/checkpoint-69500/adapter_model/adapter_model.bin b/checkpoint-69500/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8077f59997c7d8fa6a0a7c7e9292ff33a8107f9b --- /dev/null +++ b/checkpoint-69500/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d536051f2a1ab536e6e716808efa406b8fc4bc641ebcf6102a663de9eab5ffe +size 500897101 diff --git a/checkpoint-69500/optimizer.pt b/checkpoint-69500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0fa7a9efdc2f860685085b06d5c19abedffc11a --- /dev/null +++ b/checkpoint-69500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0df421a10c3784a131b0ab37e1485ed063b6fa56024cc56104f9dbaad09ebe1 +size 1001724605 diff --git a/checkpoint-69500/rng_state.pth b/checkpoint-69500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..87f90c42022499cbddffc755d7034ebd31d186f5 --- /dev/null +++ b/checkpoint-69500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9ee221e71303f97217b0d58a1364dcc9e4c1fac4ba0baf829b9e79b7ae1680b +size 14575 diff --git a/checkpoint-69500/scheduler.pt b/checkpoint-69500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a7b6e54a6254ecb5c039650773e303d80e1178c --- /dev/null +++ b/checkpoint-69500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca69d6e74edb4d1fa3e9c45efbdb18d22e7412cb25b7cb947ef97719376c1f2 +size 627 diff --git a/checkpoint-58000/trainer_state.json b/checkpoint-69500/trainer_state.json similarity index 90% rename from checkpoint-58000/trainer_state.json rename to checkpoint-69500/trainer_state.json index 35f2bb833dfc31ae6f6c785565af1f3fb9273743..0615a024b721a7fc2b0a79069816431cfc3cd203 100644 --- a/checkpoint-58000/trainer_state.json +++ b/checkpoint-69500/trainer_state.json @@ -1,8 +1,9 @@ { - "best_metric": 0.4916069805622101, - "best_model_checkpoint": "./qlora-out/checkpoint-58000", - "epoch": 2.1624846202602437, - "global_step": 58000, + "best_metric": 0.47866225242614746, + "best_model_checkpoint": "./qlora-out/checkpoint-69500", + "epoch": 2.591253122553223, + "eval_steps": 500, + "global_step": 69500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -3950,11 +3951,375 @@ "eval_samples_per_second": 0.436, "eval_steps_per_second": 0.436, "step": 58000 + }, + { + "epoch": 2.17, + "learning_rate": 3.576595345767464e-05, + "loss": 0.3759, + "step": 58100 + }, + { + "epoch": 2.17, + "learning_rate": 3.5467166397551524e-05, + "loss": 0.3987, + "step": 58200 + }, + { + "epoch": 2.17, + "learning_rate": 3.5169363338208094e-05, + "loss": 0.3809, + "step": 58300 + }, + { + "epoch": 2.18, + "learning_rate": 3.4872548820564455e-05, + "loss": 0.3851, + "step": 58400 + }, + { + "epoch": 2.18, + "learning_rate": 3.457672737046737e-05, + "loss": 0.3832, + "step": 58500 + }, + { + "epoch": 2.18, + "learning_rate": 3.42819034986213e-05, + "loss": 0.3923, + "step": 58600 + }, + { + "epoch": 2.19, + "learning_rate": 3.398808170051951e-05, + "loss": 0.3609, + "step": 58700 + }, + { + "epoch": 2.19, + "learning_rate": 3.369526645637556e-05, + "loss": 0.3538, + "step": 58800 + }, + { + "epoch": 2.2, + "learning_rate": 3.3403462231055107e-05, + "loss": 0.3941, + "step": 58900 + }, + { + "epoch": 2.2, + "learning_rate": 3.3112673474007584e-05, + "loss": 0.3984, + "step": 59000 + }, + { + "epoch": 2.2, + "eval_loss": 0.4893116354942322, + "eval_runtime": 1243.7748, + "eval_samples_per_second": 0.436, + "eval_steps_per_second": 0.436, + "step": 59000 + }, + { + "epoch": 2.22, + "learning_rate": 3.167411635594364e-05, + "loss": 0.3867, + "step": 59500 + }, + { + "epoch": 2.22, + "eval_loss": 0.48985520005226135, + "eval_runtime": 1240.4608, + "eval_samples_per_second": 0.437, + "eval_steps_per_second": 0.437, + "step": 59500 + }, + { + "epoch": 2.24, + "learning_rate": 3.0261604379828834e-05, + "loss": 0.3736, + "step": 60000 + }, + { + "epoch": 2.24, + "eval_loss": 0.489548921585083, + "eval_runtime": 1234.7527, + "eval_samples_per_second": 0.439, + "eval_steps_per_second": 0.439, + "step": 60000 + }, + { + "epoch": 2.26, + "learning_rate": 2.887567598106955e-05, + "loss": 0.361, + "step": 60500 + }, + { + "epoch": 2.26, + "eval_loss": 0.4885287582874298, + "eval_runtime": 1231.4045, + "eval_samples_per_second": 0.44, + "eval_steps_per_second": 0.44, + "step": 60500 + }, + { + "epoch": 2.27, + "learning_rate": 2.7516859461678857e-05, + "loss": 0.3778, + "step": 61000 + }, + { + "epoch": 2.27, + "eval_loss": 0.4883672893047333, + "eval_runtime": 1235.8497, + "eval_samples_per_second": 0.439, + "eval_steps_per_second": 0.439, + "step": 61000 + }, + { + "epoch": 2.29, + "learning_rate": 2.618567278889328e-05, + "loss": 0.3791, + "step": 61500 + }, + { + "epoch": 2.29, + "eval_loss": 0.4874744415283203, + "eval_runtime": 1231.8195, + "eval_samples_per_second": 0.44, + "eval_steps_per_second": 0.44, + "step": 61500 + }, + { + "epoch": 2.31, + "learning_rate": 2.4882623397728655e-05, + "loss": 0.3705, + "step": 62000 + }, + { + "epoch": 2.31, + "eval_loss": 0.486933171749115, + "eval_runtime": 1227.5583, + "eval_samples_per_second": 0.442, + "eval_steps_per_second": 0.442, + "step": 62000 + }, + { + "epoch": 2.33, + "learning_rate": 2.3608207997551255e-05, + "loss": 0.3698, + "step": 62500 + }, + { + "epoch": 2.33, + "eval_loss": 0.48592954874038696, + "eval_runtime": 1282.2531, + "eval_samples_per_second": 0.423, + "eval_steps_per_second": 0.423, + "step": 62500 + }, + { + "epoch": 2.35, + "learning_rate": 2.2362912382736857e-05, + "loss": 0.381, + "step": 63000 + }, + { + "epoch": 2.35, + "eval_loss": 0.4852922856807709, + "eval_runtime": 1229.4457, + "eval_samples_per_second": 0.441, + "eval_steps_per_second": 0.441, + "step": 63000 + }, + { + "epoch": 2.37, + "learning_rate": 2.1147211247491084e-05, + "loss": 0.3728, + "step": 63500 + }, + { + "epoch": 2.37, + "eval_loss": 0.484967440366745, + "eval_runtime": 1296.2845, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 63500 + }, + { + "epoch": 2.39, + "learning_rate": 1.9961568004900565e-05, + "loss": 0.3695, + "step": 64000 + }, + { + "epoch": 2.39, + "eval_loss": 0.4844016432762146, + "eval_runtime": 1317.5418, + "eval_samples_per_second": 0.411, + "eval_steps_per_second": 0.411, + "step": 64000 + }, + { + "epoch": 2.4, + "learning_rate": 1.8806434610284497e-05, + "loss": 0.3682, + "step": 64500 + }, + { + "epoch": 2.4, + "eval_loss": 0.4838670790195465, + "eval_runtime": 1337.5922, + "eval_samples_per_second": 0.405, + "eval_steps_per_second": 0.405, + "step": 64500 + }, + { + "epoch": 2.42, + "learning_rate": 1.768225138891393e-05, + "loss": 0.3594, + "step": 65000 + }, + { + "epoch": 2.42, + "eval_loss": 0.48305046558380127, + "eval_runtime": 1317.2888, + "eval_samples_per_second": 0.411, + "eval_steps_per_second": 0.411, + "step": 65000 + }, + { + "epoch": 2.44, + "learning_rate": 1.6589446868164037e-05, + "loss": 0.367, + "step": 65500 + }, + { + "epoch": 2.44, + "eval_loss": 0.48225167393684387, + "eval_runtime": 1315.9763, + "eval_samples_per_second": 0.412, + "eval_steps_per_second": 0.412, + "step": 65500 + }, + { + "epoch": 2.46, + "learning_rate": 1.552843761416395e-05, + "loss": 0.3781, + "step": 66000 + }, + { + "epoch": 2.46, + "eval_loss": 0.48182958364486694, + "eval_runtime": 1298.0711, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 66000 + }, + { + "epoch": 2.48, + "learning_rate": 1.4499628073005733e-05, + "loss": 0.3632, + "step": 66500 + }, + { + "epoch": 2.48, + "eval_loss": 0.48136985301971436, + "eval_runtime": 1295.6256, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 66500 + }, + { + "epoch": 2.5, + "learning_rate": 1.350341041657378e-05, + "loss": 0.3707, + "step": 67000 + }, + { + "epoch": 2.5, + "eval_loss": 0.48081424832344055, + "eval_runtime": 1297.8801, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 67000 + }, + { + "epoch": 2.52, + "learning_rate": 1.2540164393052622e-05, + "loss": 0.3657, + "step": 67500 + }, + { + "epoch": 2.52, + "eval_loss": 0.48031187057495117, + "eval_runtime": 1299.2471, + "eval_samples_per_second": 0.417, + "eval_steps_per_second": 0.417, + "step": 67500 + }, + { + "epoch": 2.54, + "learning_rate": 1.1610257182170914e-05, + "loss": 0.3742, + "step": 68000 + }, + { + "epoch": 2.54, + "eval_loss": 0.479922354221344, + "eval_runtime": 1275.2567, + "eval_samples_per_second": 0.425, + "eval_steps_per_second": 0.425, + "step": 68000 + }, + { + "epoch": 2.55, + "learning_rate": 1.0714043255236094e-05, + "loss": 0.3761, + "step": 68500 + }, + { + "epoch": 2.55, + "eval_loss": 0.4795922338962555, + "eval_runtime": 1321.5276, + "eval_samples_per_second": 0.41, + "eval_steps_per_second": 0.41, + "step": 68500 + }, + { + "epoch": 2.57, + "learning_rate": 9.851864240013509e-06, + "loss": 0.3754, + "step": 69000 + }, + { + "epoch": 2.57, + "eval_loss": 0.4789520502090454, + "eval_runtime": 1345.4528, + "eval_samples_per_second": 0.403, + "eval_steps_per_second": 0.403, + "step": 69000 + }, + { + "epoch": 2.59, + "learning_rate": 9.024048790501272e-06, + "loss": 0.3594, + "step": 69500 + }, + { + "epoch": 2.59, + "eval_loss": 0.47866225242614746, + "eval_runtime": 1316.9883, + "eval_samples_per_second": 0.412, + "eval_steps_per_second": 0.412, + "step": 69500 } ], + "logging_steps": 500, "max_steps": 80463, "num_train_epochs": 3, - "total_flos": 1.6261229153876214e+19, + "save_steps": 500, + "total_flos": 1.950603151563399e+19, "trial_name": null, "trial_params": null } diff --git a/checkpoint-69500/training_args.bin b/checkpoint-69500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fcdbc2e5ceda75e1111d82393dc8f31eb77db7e6 --- /dev/null +++ b/checkpoint-69500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35750ce2c97e67db338d1121db50269062def2ea29de48747dfd43b7a072ee79 +size 4155 diff --git a/checkpoint-70000/README.md b/checkpoint-70000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6 --- /dev/null +++ b/checkpoint-70000/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/checkpoint-70000/adapter_config.json b/checkpoint-70000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..173e5213955c8b23655ab5091de8362cacab7bda --- /dev/null +++ b/checkpoint-70000/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "gate_proj", + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "up_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-70000/adapter_model.bin b/checkpoint-70000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..184610ccd8dd69ea3e5599df40a79e43410fef00 --- /dev/null +++ b/checkpoint-70000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b27172603bfaa42af020910d0f3a0724656396738e74f39eebef1c4c53cd6c +size 500897101 diff --git a/checkpoint-70000/adapter_model/README.md b/checkpoint-70000/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6 --- /dev/null +++ b/checkpoint-70000/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/checkpoint-70000/adapter_model/adapter_config.json b/checkpoint-70000/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..173e5213955c8b23655ab5091de8362cacab7bda --- /dev/null +++ b/checkpoint-70000/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "gate_proj", + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "up_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-70000/adapter_model/adapter_model.bin b/checkpoint-70000/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..184610ccd8dd69ea3e5599df40a79e43410fef00 --- /dev/null +++ b/checkpoint-70000/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b27172603bfaa42af020910d0f3a0724656396738e74f39eebef1c4c53cd6c +size 500897101 diff --git a/checkpoint-70000/optimizer.pt b/checkpoint-70000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..37ca29a6fcd4e814d9f9f815a810e09760dcc0ac --- /dev/null +++ b/checkpoint-70000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3757834dca752ceb36448c74c65b6c698a3cf7eac3b443be1d20520a1ef75c80 +size 1001724605 diff --git a/checkpoint-70000/rng_state.pth b/checkpoint-70000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4fc38bc7b3e0e5a8ff782d8ddc9e8837d4da8a63 --- /dev/null +++ b/checkpoint-70000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3b4a721a0714cca4311a027981bf55d9c240a69a7f46c912f368eb795c5d17f +size 14575 diff --git a/checkpoint-70000/scheduler.pt b/checkpoint-70000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..aabf19f0c7bce27f0b6b563951dae9ee153127a8 --- /dev/null +++ b/checkpoint-70000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d978803312071ed04341fcce57866c271d97c1ced7225c7be19f70453e4d9836 +size 627 diff --git a/checkpoint-57000/trainer_state.json b/checkpoint-70000/trainer_state.json similarity index 88% rename from checkpoint-57000/trainer_state.json rename to checkpoint-70000/trainer_state.json index d6f68f2aaf1f93c9324d725532c63d6f188b70b1..4286bc1591df5b2d35ef6a4f712eb3489bff2f85 100644 --- a/checkpoint-57000/trainer_state.json +++ b/checkpoint-70000/trainer_state.json @@ -1,8 +1,9 @@ { - "best_metric": 0.49361398816108704, - "best_model_checkpoint": "./qlora-out/checkpoint-57000", - "epoch": 2.12520040266955, - "global_step": 57000, + "best_metric": 0.47838443517684937, + "best_model_checkpoint": "./qlora-out/checkpoint-70000", + "epoch": 2.6098952313485704, + "eval_steps": 500, + "global_step": 70000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -3882,11 +3883,457 @@ "eval_samples_per_second": 0.449, "eval_steps_per_second": 0.449, "step": 57000 + }, + { + "epoch": 2.13, + "learning_rate": 3.8806934461220826e-05, + "loss": 0.3512, + "step": 57100 + }, + { + "epoch": 2.13, + "learning_rate": 3.8498560410244546e-05, + "loss": 0.3715, + "step": 57200 + }, + { + "epoch": 2.14, + "learning_rate": 3.819112413715791e-05, + "loss": 0.3803, + "step": 57300 + }, + { + "epoch": 2.14, + "learning_rate": 3.7884630329768875e-05, + "loss": 0.3785, + "step": 57400 + }, + { + "epoch": 2.14, + "learning_rate": 3.757908366151463e-05, + "loss": 0.3626, + "step": 57500 + }, + { + "epoch": 2.15, + "learning_rate": 3.72744887913904e-05, + "loss": 0.3981, + "step": 57600 + }, + { + "epoch": 2.15, + "learning_rate": 3.697085036387822e-05, + "loss": 0.3918, + "step": 57700 + }, + { + "epoch": 2.16, + "learning_rate": 3.6668173008876324e-05, + "loss": 0.3876, + "step": 57800 + }, + { + "epoch": 2.16, + "learning_rate": 3.6366461341628396e-05, + "loss": 0.3878, + "step": 57900 + }, + { + "epoch": 2.16, + "learning_rate": 3.606571996265321e-05, + "loss": 0.3674, + "step": 58000 + }, + { + "epoch": 2.16, + "eval_loss": 0.4916069805622101, + "eval_runtime": 1244.109, + "eval_samples_per_second": 0.436, + "eval_steps_per_second": 0.436, + "step": 58000 + }, + { + "epoch": 2.17, + "learning_rate": 3.576595345767464e-05, + "loss": 0.3759, + "step": 58100 + }, + { + "epoch": 2.17, + "learning_rate": 3.5467166397551524e-05, + "loss": 0.3987, + "step": 58200 + }, + { + "epoch": 2.17, + "learning_rate": 3.5169363338208094e-05, + "loss": 0.3809, + "step": 58300 + }, + { + "epoch": 2.18, + "learning_rate": 3.4872548820564455e-05, + "loss": 0.3851, + "step": 58400 + }, + { + "epoch": 2.18, + "learning_rate": 3.457672737046737e-05, + "loss": 0.3832, + "step": 58500 + }, + { + "epoch": 2.18, + "learning_rate": 3.42819034986213e-05, + "loss": 0.3923, + "step": 58600 + }, + { + "epoch": 2.19, + "learning_rate": 3.398808170051951e-05, + "loss": 0.3609, + "step": 58700 + }, + { + "epoch": 2.19, + "learning_rate": 3.369526645637556e-05, + "loss": 0.3538, + "step": 58800 + }, + { + "epoch": 2.2, + "learning_rate": 3.3403462231055107e-05, + "loss": 0.3941, + "step": 58900 + }, + { + "epoch": 2.2, + "learning_rate": 3.3112673474007584e-05, + "loss": 0.3984, + "step": 59000 + }, + { + "epoch": 2.2, + "eval_loss": 0.4893116354942322, + "eval_runtime": 1243.7748, + "eval_samples_per_second": 0.436, + "eval_steps_per_second": 0.436, + "step": 59000 + }, + { + "epoch": 2.22, + "learning_rate": 3.167411635594364e-05, + "loss": 0.3867, + "step": 59500 + }, + { + "epoch": 2.22, + "eval_loss": 0.48985520005226135, + "eval_runtime": 1240.4608, + "eval_samples_per_second": 0.437, + "eval_steps_per_second": 0.437, + "step": 59500 + }, + { + "epoch": 2.24, + "learning_rate": 3.0261604379828834e-05, + "loss": 0.3736, + "step": 60000 + }, + { + "epoch": 2.24, + "eval_loss": 0.489548921585083, + "eval_runtime": 1234.7527, + "eval_samples_per_second": 0.439, + "eval_steps_per_second": 0.439, + "step": 60000 + }, + { + "epoch": 2.26, + "learning_rate": 2.887567598106955e-05, + "loss": 0.361, + "step": 60500 + }, + { + "epoch": 2.26, + "eval_loss": 0.4885287582874298, + "eval_runtime": 1231.4045, + "eval_samples_per_second": 0.44, + "eval_steps_per_second": 0.44, + "step": 60500 + }, + { + "epoch": 2.27, + "learning_rate": 2.7516859461678857e-05, + "loss": 0.3778, + "step": 61000 + }, + { + "epoch": 2.27, + "eval_loss": 0.4883672893047333, + "eval_runtime": 1235.8497, + "eval_samples_per_second": 0.439, + "eval_steps_per_second": 0.439, + "step": 61000 + }, + { + "epoch": 2.29, + "learning_rate": 2.618567278889328e-05, + "loss": 0.3791, + "step": 61500 + }, + { + "epoch": 2.29, + "eval_loss": 0.4874744415283203, + "eval_runtime": 1231.8195, + "eval_samples_per_second": 0.44, + "eval_steps_per_second": 0.44, + "step": 61500 + }, + { + "epoch": 2.31, + "learning_rate": 2.4882623397728655e-05, + "loss": 0.3705, + "step": 62000 + }, + { + "epoch": 2.31, + "eval_loss": 0.486933171749115, + "eval_runtime": 1227.5583, + "eval_samples_per_second": 0.442, + "eval_steps_per_second": 0.442, + "step": 62000 + }, + { + "epoch": 2.33, + "learning_rate": 2.3608207997551255e-05, + "loss": 0.3698, + "step": 62500 + }, + { + "epoch": 2.33, + "eval_loss": 0.48592954874038696, + "eval_runtime": 1282.2531, + "eval_samples_per_second": 0.423, + "eval_steps_per_second": 0.423, + "step": 62500 + }, + { + "epoch": 2.35, + "learning_rate": 2.2362912382736857e-05, + "loss": 0.381, + "step": 63000 + }, + { + "epoch": 2.35, + "eval_loss": 0.4852922856807709, + "eval_runtime": 1229.4457, + "eval_samples_per_second": 0.441, + "eval_steps_per_second": 0.441, + "step": 63000 + }, + { + "epoch": 2.37, + "learning_rate": 2.1147211247491084e-05, + "loss": 0.3728, + "step": 63500 + }, + { + "epoch": 2.37, + "eval_loss": 0.484967440366745, + "eval_runtime": 1296.2845, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 63500 + }, + { + "epoch": 2.39, + "learning_rate": 1.9961568004900565e-05, + "loss": 0.3695, + "step": 64000 + }, + { + "epoch": 2.39, + "eval_loss": 0.4844016432762146, + "eval_runtime": 1317.5418, + "eval_samples_per_second": 0.411, + "eval_steps_per_second": 0.411, + "step": 64000 + }, + { + "epoch": 2.4, + "learning_rate": 1.8806434610284497e-05, + "loss": 0.3682, + "step": 64500 + }, + { + "epoch": 2.4, + "eval_loss": 0.4838670790195465, + "eval_runtime": 1337.5922, + "eval_samples_per_second": 0.405, + "eval_steps_per_second": 0.405, + "step": 64500 + }, + { + "epoch": 2.42, + "learning_rate": 1.768225138891393e-05, + "loss": 0.3594, + "step": 65000 + }, + { + "epoch": 2.42, + "eval_loss": 0.48305046558380127, + "eval_runtime": 1317.2888, + "eval_samples_per_second": 0.411, + "eval_steps_per_second": 0.411, + "step": 65000 + }, + { + "epoch": 2.44, + "learning_rate": 1.6589446868164037e-05, + "loss": 0.367, + "step": 65500 + }, + { + "epoch": 2.44, + "eval_loss": 0.48225167393684387, + "eval_runtime": 1315.9763, + "eval_samples_per_second": 0.412, + "eval_steps_per_second": 0.412, + "step": 65500 + }, + { + "epoch": 2.46, + "learning_rate": 1.552843761416395e-05, + "loss": 0.3781, + "step": 66000 + }, + { + "epoch": 2.46, + "eval_loss": 0.48182958364486694, + "eval_runtime": 1298.0711, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 66000 + }, + { + "epoch": 2.48, + "learning_rate": 1.4499628073005733e-05, + "loss": 0.3632, + "step": 66500 + }, + { + "epoch": 2.48, + "eval_loss": 0.48136985301971436, + "eval_runtime": 1295.6256, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 66500 + }, + { + "epoch": 2.5, + "learning_rate": 1.350341041657378e-05, + "loss": 0.3707, + "step": 67000 + }, + { + "epoch": 2.5, + "eval_loss": 0.48081424832344055, + "eval_runtime": 1297.8801, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 67000 + }, + { + "epoch": 2.52, + "learning_rate": 1.2540164393052622e-05, + "loss": 0.3657, + "step": 67500 + }, + { + "epoch": 2.52, + "eval_loss": 0.48031187057495117, + "eval_runtime": 1299.2471, + "eval_samples_per_second": 0.417, + "eval_steps_per_second": 0.417, + "step": 67500 + }, + { + "epoch": 2.54, + "learning_rate": 1.1610257182170914e-05, + "loss": 0.3742, + "step": 68000 + }, + { + "epoch": 2.54, + "eval_loss": 0.479922354221344, + "eval_runtime": 1275.2567, + "eval_samples_per_second": 0.425, + "eval_steps_per_second": 0.425, + "step": 68000 + }, + { + "epoch": 2.55, + "learning_rate": 1.0714043255236094e-05, + "loss": 0.3761, + "step": 68500 + }, + { + "epoch": 2.55, + "eval_loss": 0.4795922338962555, + "eval_runtime": 1321.5276, + "eval_samples_per_second": 0.41, + "eval_steps_per_second": 0.41, + "step": 68500 + }, + { + "epoch": 2.57, + "learning_rate": 9.851864240013509e-06, + "loss": 0.3754, + "step": 69000 + }, + { + "epoch": 2.57, + "eval_loss": 0.4789520502090454, + "eval_runtime": 1345.4528, + "eval_samples_per_second": 0.403, + "eval_steps_per_second": 0.403, + "step": 69000 + }, + { + "epoch": 2.59, + "learning_rate": 9.024048790501272e-06, + "loss": 0.3594, + "step": 69500 + }, + { + "epoch": 2.59, + "eval_loss": 0.47866225242614746, + "eval_runtime": 1316.9883, + "eval_samples_per_second": 0.412, + "eval_steps_per_second": 0.412, + "step": 69500 + }, + { + "epoch": 2.61, + "learning_rate": 8.230912461650797e-06, + "loss": 0.3601, + "step": 70000 + }, + { + "epoch": 2.61, + "eval_loss": 0.47838443517684937, + "eval_runtime": 1306.7325, + "eval_samples_per_second": 0.415, + "eval_steps_per_second": 0.415, + "step": 70000 } ], + "logging_steps": 500, "max_steps": 80463, "num_train_epochs": 3, - "total_flos": 1.5981607298407956e+19, + "save_steps": 500, + "total_flos": 1.96476655962565e+19, "trial_name": null, "trial_params": null } diff --git a/checkpoint-70000/training_args.bin b/checkpoint-70000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fcdbc2e5ceda75e1111d82393dc8f31eb77db7e6 --- /dev/null +++ b/checkpoint-70000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35750ce2c97e67db338d1121db50269062def2ea29de48747dfd43b7a072ee79 +size 4155 diff --git a/checkpoint-70500/README.md b/checkpoint-70500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6 --- /dev/null +++ b/checkpoint-70500/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/checkpoint-70500/adapter_config.json b/checkpoint-70500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..173e5213955c8b23655ab5091de8362cacab7bda --- /dev/null +++ b/checkpoint-70500/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "gate_proj", + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "up_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-70500/adapter_model.bin b/checkpoint-70500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..65fc9cf1ea0ef35269a18503d87f4db78d9e319e --- /dev/null +++ b/checkpoint-70500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a26259b6c7f10eacd37169a51779a24aa9d6a76d8fdef027422bdcbf2557c2f +size 500897101 diff --git a/checkpoint-70500/adapter_model/README.md b/checkpoint-70500/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6 --- /dev/null +++ b/checkpoint-70500/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/checkpoint-70500/adapter_model/adapter_config.json b/checkpoint-70500/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..173e5213955c8b23655ab5091de8362cacab7bda --- /dev/null +++ b/checkpoint-70500/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "/workspace/webui/models/TheBloke_Llama-2-13B-fp16", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "revision": null, + "target_modules": [ + "gate_proj", + "v_proj", + "down_proj", + "k_proj", + "q_proj", + "up_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-70500/adapter_model/adapter_model.bin b/checkpoint-70500/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..65fc9cf1ea0ef35269a18503d87f4db78d9e319e --- /dev/null +++ b/checkpoint-70500/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a26259b6c7f10eacd37169a51779a24aa9d6a76d8fdef027422bdcbf2557c2f +size 500897101 diff --git a/checkpoint-70500/optimizer.pt b/checkpoint-70500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cdc867391dc179d2218638bd9795ccbe393c93ee --- /dev/null +++ b/checkpoint-70500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e892af212f732e9530af52f246a0ac8cce7e5fdd232039bb0e4c90cdd7fa3e74 +size 1001724605 diff --git a/checkpoint-70500/rng_state.pth b/checkpoint-70500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..efa8eda190af8d0dd2138196404efa4c77c286c1 --- /dev/null +++ b/checkpoint-70500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a8693cacc78c05a4720cdf55aa732a0282b2cc8d97e8bde33f65f1b59bbf12e +size 14575 diff --git a/checkpoint-70500/scheduler.pt b/checkpoint-70500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9cd8cc5b98a47cb0df178180a370e80cbafd8e57 --- /dev/null +++ b/checkpoint-70500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d97c294b2bc2150ad9018c4136e33bcd18ab0fac2dca93dc8eff3b34e709e5be +size 627 diff --git a/checkpoint-56000/trainer_state.json b/checkpoint-70500/trainer_state.json similarity index 86% rename from checkpoint-56000/trainer_state.json rename to checkpoint-70500/trainer_state.json index cb51e96cab8009017d097e82100123d384dacc6b..5495a790d7715c965c1a80f6a460922cf009acfc 100644 --- a/checkpoint-56000/trainer_state.json +++ b/checkpoint-70500/trainer_state.json @@ -1,8 +1,9 @@ { - "best_metric": 0.49594032764434814, - "best_model_checkpoint": "./qlora-out/checkpoint-56000", - "epoch": 2.087916185078856, - "global_step": 56000, + "best_metric": 0.4780386686325073, + "best_model_checkpoint": "./qlora-out/checkpoint-70500", + "epoch": 2.628537340143917, + "eval_steps": 500, + "global_step": 70500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -3814,11 +3815,539 @@ "eval_samples_per_second": 0.445, "eval_steps_per_second": 0.445, "step": 56000 + }, + { + "epoch": 2.09, + "learning_rate": 4.194121130580594e-05, + "loss": 0.3779, + "step": 56100 + }, + { + "epoch": 2.1, + "learning_rate": 4.1623720415556336e-05, + "loss": 0.3651, + "step": 56200 + }, + { + "epoch": 2.1, + "learning_rate": 4.1307119650556494e-05, + "loss": 0.3754, + "step": 56300 + }, + { + "epoch": 2.1, + "learning_rate": 4.099141383835512e-05, + "loss": 0.3887, + "step": 56400 + }, + { + "epoch": 2.11, + "learning_rate": 4.067660779285465e-05, + "loss": 0.3739, + "step": 56500 + }, + { + "epoch": 2.11, + "learning_rate": 4.036270631423781e-05, + "loss": 0.3842, + "step": 56600 + }, + { + "epoch": 2.11, + "learning_rate": 4.004971418889447e-05, + "loss": 0.3723, + "step": 56700 + }, + { + "epoch": 2.12, + "learning_rate": 3.9737636189348634e-05, + "loss": 0.3889, + "step": 56800 + }, + { + "epoch": 2.12, + "learning_rate": 3.942647707418561e-05, + "loss": 0.3897, + "step": 56900 + }, + { + "epoch": 2.13, + "learning_rate": 3.9116241587979496e-05, + "loss": 0.3592, + "step": 57000 + }, + { + "epoch": 2.13, + "eval_loss": 0.49361398816108704, + "eval_runtime": 1208.1063, + "eval_samples_per_second": 0.449, + "eval_steps_per_second": 0.449, + "step": 57000 + }, + { + "epoch": 2.13, + "learning_rate": 3.8806934461220826e-05, + "loss": 0.3512, + "step": 57100 + }, + { + "epoch": 2.13, + "learning_rate": 3.8498560410244546e-05, + "loss": 0.3715, + "step": 57200 + }, + { + "epoch": 2.14, + "learning_rate": 3.819112413715791e-05, + "loss": 0.3803, + "step": 57300 + }, + { + "epoch": 2.14, + "learning_rate": 3.7884630329768875e-05, + "loss": 0.3785, + "step": 57400 + }, + { + "epoch": 2.14, + "learning_rate": 3.757908366151463e-05, + "loss": 0.3626, + "step": 57500 + }, + { + "epoch": 2.15, + "learning_rate": 3.72744887913904e-05, + "loss": 0.3981, + "step": 57600 + }, + { + "epoch": 2.15, + "learning_rate": 3.697085036387822e-05, + "loss": 0.3918, + "step": 57700 + }, + { + "epoch": 2.16, + "learning_rate": 3.6668173008876324e-05, + "loss": 0.3876, + "step": 57800 + }, + { + "epoch": 2.16, + "learning_rate": 3.6366461341628396e-05, + "loss": 0.3878, + "step": 57900 + }, + { + "epoch": 2.16, + "learning_rate": 3.606571996265321e-05, + "loss": 0.3674, + "step": 58000 + }, + { + "epoch": 2.16, + "eval_loss": 0.4916069805622101, + "eval_runtime": 1244.109, + "eval_samples_per_second": 0.436, + "eval_steps_per_second": 0.436, + "step": 58000 + }, + { + "epoch": 2.17, + "learning_rate": 3.576595345767464e-05, + "loss": 0.3759, + "step": 58100 + }, + { + "epoch": 2.17, + "learning_rate": 3.5467166397551524e-05, + "loss": 0.3987, + "step": 58200 + }, + { + "epoch": 2.17, + "learning_rate": 3.5169363338208094e-05, + "loss": 0.3809, + "step": 58300 + }, + { + "epoch": 2.18, + "learning_rate": 3.4872548820564455e-05, + "loss": 0.3851, + "step": 58400 + }, + { + "epoch": 2.18, + "learning_rate": 3.457672737046737e-05, + "loss": 0.3832, + "step": 58500 + }, + { + "epoch": 2.18, + "learning_rate": 3.42819034986213e-05, + "loss": 0.3923, + "step": 58600 + }, + { + "epoch": 2.19, + "learning_rate": 3.398808170051951e-05, + "loss": 0.3609, + "step": 58700 + }, + { + "epoch": 2.19, + "learning_rate": 3.369526645637556e-05, + "loss": 0.3538, + "step": 58800 + }, + { + "epoch": 2.2, + "learning_rate": 3.3403462231055107e-05, + "loss": 0.3941, + "step": 58900 + }, + { + "epoch": 2.2, + "learning_rate": 3.3112673474007584e-05, + "loss": 0.3984, + "step": 59000 + }, + { + "epoch": 2.2, + "eval_loss": 0.4893116354942322, + "eval_runtime": 1243.7748, + "eval_samples_per_second": 0.436, + "eval_steps_per_second": 0.436, + "step": 59000 + }, + { + "epoch": 2.22, + "learning_rate": 3.167411635594364e-05, + "loss": 0.3867, + "step": 59500 + }, + { + "epoch": 2.22, + "eval_loss": 0.48985520005226135, + "eval_runtime": 1240.4608, + "eval_samples_per_second": 0.437, + "eval_steps_per_second": 0.437, + "step": 59500 + }, + { + "epoch": 2.24, + "learning_rate": 3.0261604379828834e-05, + "loss": 0.3736, + "step": 60000 + }, + { + "epoch": 2.24, + "eval_loss": 0.489548921585083, + "eval_runtime": 1234.7527, + "eval_samples_per_second": 0.439, + "eval_steps_per_second": 0.439, + "step": 60000 + }, + { + "epoch": 2.26, + "learning_rate": 2.887567598106955e-05, + "loss": 0.361, + "step": 60500 + }, + { + "epoch": 2.26, + "eval_loss": 0.4885287582874298, + "eval_runtime": 1231.4045, + "eval_samples_per_second": 0.44, + "eval_steps_per_second": 0.44, + "step": 60500 + }, + { + "epoch": 2.27, + "learning_rate": 2.7516859461678857e-05, + "loss": 0.3778, + "step": 61000 + }, + { + "epoch": 2.27, + "eval_loss": 0.4883672893047333, + "eval_runtime": 1235.8497, + "eval_samples_per_second": 0.439, + "eval_steps_per_second": 0.439, + "step": 61000 + }, + { + "epoch": 2.29, + "learning_rate": 2.618567278889328e-05, + "loss": 0.3791, + "step": 61500 + }, + { + "epoch": 2.29, + "eval_loss": 0.4874744415283203, + "eval_runtime": 1231.8195, + "eval_samples_per_second": 0.44, + "eval_steps_per_second": 0.44, + "step": 61500 + }, + { + "epoch": 2.31, + "learning_rate": 2.4882623397728655e-05, + "loss": 0.3705, + "step": 62000 + }, + { + "epoch": 2.31, + "eval_loss": 0.486933171749115, + "eval_runtime": 1227.5583, + "eval_samples_per_second": 0.442, + "eval_steps_per_second": 0.442, + "step": 62000 + }, + { + "epoch": 2.33, + "learning_rate": 2.3608207997551255e-05, + "loss": 0.3698, + "step": 62500 + }, + { + "epoch": 2.33, + "eval_loss": 0.48592954874038696, + "eval_runtime": 1282.2531, + "eval_samples_per_second": 0.423, + "eval_steps_per_second": 0.423, + "step": 62500 + }, + { + "epoch": 2.35, + "learning_rate": 2.2362912382736857e-05, + "loss": 0.381, + "step": 63000 + }, + { + "epoch": 2.35, + "eval_loss": 0.4852922856807709, + "eval_runtime": 1229.4457, + "eval_samples_per_second": 0.441, + "eval_steps_per_second": 0.441, + "step": 63000 + }, + { + "epoch": 2.37, + "learning_rate": 2.1147211247491084e-05, + "loss": 0.3728, + "step": 63500 + }, + { + "epoch": 2.37, + "eval_loss": 0.484967440366745, + "eval_runtime": 1296.2845, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 63500 + }, + { + "epoch": 2.39, + "learning_rate": 1.9961568004900565e-05, + "loss": 0.3695, + "step": 64000 + }, + { + "epoch": 2.39, + "eval_loss": 0.4844016432762146, + "eval_runtime": 1317.5418, + "eval_samples_per_second": 0.411, + "eval_steps_per_second": 0.411, + "step": 64000 + }, + { + "epoch": 2.4, + "learning_rate": 1.8806434610284497e-05, + "loss": 0.3682, + "step": 64500 + }, + { + "epoch": 2.4, + "eval_loss": 0.4838670790195465, + "eval_runtime": 1337.5922, + "eval_samples_per_second": 0.405, + "eval_steps_per_second": 0.405, + "step": 64500 + }, + { + "epoch": 2.42, + "learning_rate": 1.768225138891393e-05, + "loss": 0.3594, + "step": 65000 + }, + { + "epoch": 2.42, + "eval_loss": 0.48305046558380127, + "eval_runtime": 1317.2888, + "eval_samples_per_second": 0.411, + "eval_steps_per_second": 0.411, + "step": 65000 + }, + { + "epoch": 2.44, + "learning_rate": 1.6589446868164037e-05, + "loss": 0.367, + "step": 65500 + }, + { + "epoch": 2.44, + "eval_loss": 0.48225167393684387, + "eval_runtime": 1315.9763, + "eval_samples_per_second": 0.412, + "eval_steps_per_second": 0.412, + "step": 65500 + }, + { + "epoch": 2.46, + "learning_rate": 1.552843761416395e-05, + "loss": 0.3781, + "step": 66000 + }, + { + "epoch": 2.46, + "eval_loss": 0.48182958364486694, + "eval_runtime": 1298.0711, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 66000 + }, + { + "epoch": 2.48, + "learning_rate": 1.4499628073005733e-05, + "loss": 0.3632, + "step": 66500 + }, + { + "epoch": 2.48, + "eval_loss": 0.48136985301971436, + "eval_runtime": 1295.6256, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 66500 + }, + { + "epoch": 2.5, + "learning_rate": 1.350341041657378e-05, + "loss": 0.3707, + "step": 67000 + }, + { + "epoch": 2.5, + "eval_loss": 0.48081424832344055, + "eval_runtime": 1297.8801, + "eval_samples_per_second": 0.418, + "eval_steps_per_second": 0.418, + "step": 67000 + }, + { + "epoch": 2.52, + "learning_rate": 1.2540164393052622e-05, + "loss": 0.3657, + "step": 67500 + }, + { + "epoch": 2.52, + "eval_loss": 0.48031187057495117, + "eval_runtime": 1299.2471, + "eval_samples_per_second": 0.417, + "eval_steps_per_second": 0.417, + "step": 67500 + }, + { + "epoch": 2.54, + "learning_rate": 1.1610257182170914e-05, + "loss": 0.3742, + "step": 68000 + }, + { + "epoch": 2.54, + "eval_loss": 0.479922354221344, + "eval_runtime": 1275.2567, + "eval_samples_per_second": 0.425, + "eval_steps_per_second": 0.425, + "step": 68000 + }, + { + "epoch": 2.55, + "learning_rate": 1.0714043255236094e-05, + "loss": 0.3761, + "step": 68500 + }, + { + "epoch": 2.55, + "eval_loss": 0.4795922338962555, + "eval_runtime": 1321.5276, + "eval_samples_per_second": 0.41, + "eval_steps_per_second": 0.41, + "step": 68500 + }, + { + "epoch": 2.57, + "learning_rate": 9.851864240013509e-06, + "loss": 0.3754, + "step": 69000 + }, + { + "epoch": 2.57, + "eval_loss": 0.4789520502090454, + "eval_runtime": 1345.4528, + "eval_samples_per_second": 0.403, + "eval_steps_per_second": 0.403, + "step": 69000 + }, + { + "epoch": 2.59, + "learning_rate": 9.024048790501272e-06, + "loss": 0.3594, + "step": 69500 + }, + { + "epoch": 2.59, + "eval_loss": 0.47866225242614746, + "eval_runtime": 1316.9883, + "eval_samples_per_second": 0.412, + "eval_steps_per_second": 0.412, + "step": 69500 + }, + { + "epoch": 2.61, + "learning_rate": 8.230912461650797e-06, + "loss": 0.3601, + "step": 70000 + }, + { + "epoch": 2.61, + "eval_loss": 0.47838443517684937, + "eval_runtime": 1306.7325, + "eval_samples_per_second": 0.415, + "eval_steps_per_second": 0.415, + "step": 70000 + }, + { + "epoch": 2.63, + "learning_rate": 7.472757589080226e-06, + "loss": 0.3614, + "step": 70500 + }, + { + "epoch": 2.63, + "eval_loss": 0.4780386686325073, + "eval_runtime": 1290.4017, + "eval_samples_per_second": 0.42, + "eval_steps_per_second": 0.42, + "step": 70500 } ], + "logging_steps": 500, "max_steps": 80463, "num_train_epochs": 3, - "total_flos": 1.5701320307350487e+19, + "save_steps": 500, + "total_flos": 1.978819419542102e+19, "trial_name": null, "trial_params": null } diff --git a/checkpoint-70500/training_args.bin b/checkpoint-70500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fcdbc2e5ceda75e1111d82393dc8f31eb77db7e6 --- /dev/null +++ b/checkpoint-70500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35750ce2c97e67db338d1121db50269062def2ea29de48747dfd43b7a072ee79 +size 4155 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3f58a5e115855c6ea3cec98accae196ad927222e --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "[PAD]", + "unk_token": "" +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..929a285897b674a9e6f4208e87fdbaf8db10899d --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,36 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "legacy": null, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizer", + "trust_remote_code": false, + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "use_fast": true +} diff --git a/training_args.bin b/training_args.bin index 5fa131d335bef0de487e84cca21c03f6e4d05ac0..fcdbc2e5ceda75e1111d82393dc8f31eb77db7e6 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f8ca8c55b410908f1a6fb4d78d55fe6aad82bbca76ec8021e18981496f18fa70 -size 4027 +oid sha256:35750ce2c97e67db338d1121db50269062def2ea29de48747dfd43b7a072ee79 +size 4155