update outputs path so that we can mount workspace to /workspace/data (#1623)
Browse files* update outputs path so that we can mount workspace to /workspace/data
* fix ln order
This view is limited to 50 files because it contains too many changes.
See raw diff
- docker/Dockerfile-cloud +3 -1
- examples/cerebras/btlm-ft.yml +1 -1
- examples/cerebras/qlora.yml +1 -1
- examples/code-llama/13b/lora.yml +1 -1
- examples/code-llama/13b/qlora.yml +1 -1
- examples/code-llama/34b/lora.yml +1 -1
- examples/code-llama/34b/qlora.yml +1 -1
- examples/code-llama/7b/lora.yml +1 -1
- examples/code-llama/7b/qlora.yml +1 -1
- examples/colab-notebooks/colab-axolotl-example.ipynb +1 -1
- examples/dbrx/16bit-lora.yaml +1 -1
- examples/dbrx/8bit-lora.yaml +1 -1
- examples/dbrx/fft-ds-zero3.yaml +1 -1
- examples/falcon/config-7b-lora.yml +1 -1
- examples/falcon/config-7b-qlora.yml +1 -1
- examples/falcon/config-7b.yml +1 -1
- examples/gemma/qlora.yml +1 -1
- examples/gptj/qlora.yml +1 -1
- examples/jamba/qlora.yaml +1 -1
- examples/jamba/qlora_deepspeed.yaml +1 -1
- examples/jeopardy-bot/config.yml +1 -1
- examples/llama-2/fft_optimized.yml +1 -1
- examples/llama-2/gptq-lora.yml +1 -1
- examples/llama-2/lisa.yml +1 -1
- examples/llama-2/loftq.yml +1 -1
- examples/llama-2/lora.yml +1 -1
- examples/llama-2/qlora-fsdp.yml +1 -1
- examples/llama-2/qlora.yml +1 -1
- examples/llama-2/relora.yml +1 -1
- examples/llama-3/fft-8b.yaml +1 -1
- examples/llama-3/lora-8b.yml +1 -1
- examples/llama-3/qlora-fsdp-70b.yaml +1 -1
- examples/llama-3/qlora.yml +1 -1
- examples/mamba/config.yml +1 -1
- examples/mistral/bigstral-ds-zero3.yaml +1 -1
- examples/mistral/config.yml +1 -1
- examples/mistral/lora-mps.yml +1 -1
- examples/mistral/lora.yml +1 -1
- examples/mistral/mistral-qlora-fsdp.yml +1 -1
- examples/mistral/mistral-qlora-orpo.yml +1 -1
- examples/mistral/mixtral-8x22b-qlora-fsdp.yml +1 -1
- examples/mistral/mixtral-qlora-fsdp.yml +1 -1
- examples/mistral/mixtral.yml +1 -1
- examples/mistral/mixtral_22.yml +1 -1
- examples/mistral/qlora.yml +1 -1
- examples/mpt-7b/config.yml +1 -1
- examples/openllama-3b/config.yml +1 -1
- examples/openllama-3b/lora.yml +1 -1
- examples/openllama-3b/qlora.yml +1 -1
- examples/phi/phi-ft.yml +1 -1
docker/Dockerfile-cloud
CHANGED
@@ -21,7 +21,9 @@ RUN apt install --yes --no-install-recommends openssh-server tmux && \
|
|
21 |
printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
|
22 |
printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \
|
23 |
chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
|
24 |
-
chmod +x /root/cloud-entrypoint.sh
|
|
|
|
|
25 |
|
26 |
ENTRYPOINT ["/root/cloud-entrypoint.sh"]
|
27 |
CMD ["sleep", "infinity"]
|
|
|
21 |
printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
|
22 |
printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \
|
23 |
chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
|
24 |
+
chmod +x /root/cloud-entrypoint.sh && \
|
25 |
+
mkdir -p /workspace/data/axolotl-artifacts && \
|
26 |
+
ln -sf /workspace/data/axolotl-artifacts /workspace/axolotl/outputs
|
27 |
|
28 |
ENTRYPOINT ["/root/cloud-entrypoint.sh"]
|
29 |
CMD ["sleep", "infinity"]
|
examples/cerebras/btlm-ft.yml
CHANGED
@@ -38,7 +38,7 @@ wandb_watch:
|
|
38 |
wandb_name:
|
39 |
wandb_log_model:
|
40 |
|
41 |
-
output_dir: btlm-out
|
42 |
gradient_accumulation_steps: 1
|
43 |
micro_batch_size: 1
|
44 |
num_epochs: 1
|
|
|
38 |
wandb_name:
|
39 |
wandb_log_model:
|
40 |
|
41 |
+
output_dir: ./outputs/btlm-out
|
42 |
gradient_accumulation_steps: 1
|
43 |
micro_batch_size: 1
|
44 |
num_epochs: 1
|
examples/cerebras/qlora.yml
CHANGED
@@ -25,7 +25,7 @@ wandb_entity:
|
|
25 |
wandb_watch:
|
26 |
wandb_name:
|
27 |
wandb_log_model:
|
28 |
-
output_dir: ./qlora-out
|
29 |
batch_size: 4
|
30 |
micro_batch_size: 4
|
31 |
num_epochs: 2
|
|
|
25 |
wandb_watch:
|
26 |
wandb_name:
|
27 |
wandb_log_model:
|
28 |
+
output_dir: ./outputs/qlora-out
|
29 |
batch_size: 4
|
30 |
micro_batch_size: 4
|
31 |
num_epochs: 2
|
examples/code-llama/13b/lora.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./lora-out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/lora-out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
examples/code-llama/13b/qlora.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./qlora-out
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/qlora-out
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
examples/code-llama/34b/lora.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./lora-out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/lora-out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
examples/code-llama/34b/qlora.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./qlora-out
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/qlora-out
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
examples/code-llama/7b/lora.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./lora-out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/lora-out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
examples/code-llama/7b/qlora.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./qlora-out
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/qlora-out
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
examples/colab-notebooks/colab-axolotl-example.ipynb
CHANGED
@@ -84,7 +84,7 @@
|
|
84 |
" type: alpaca\n",
|
85 |
"dataset_prepared_path:\n",
|
86 |
"val_set_size: 0.05\n",
|
87 |
-
"output_dir: ./qlora-out\n",
|
88 |
"\n",
|
89 |
"adapter: qlora\n",
|
90 |
"lora_model_dir:\n",
|
|
|
84 |
" type: alpaca\n",
|
85 |
"dataset_prepared_path:\n",
|
86 |
"val_set_size: 0.05\n",
|
87 |
+
"output_dir: ./outputs/qlora-out\n",
|
88 |
"\n",
|
89 |
"adapter: qlora\n",
|
90 |
"lora_model_dir:\n",
|
examples/dbrx/16bit-lora.yaml
CHANGED
@@ -10,7 +10,7 @@ datasets:
|
|
10 |
type: alpaca
|
11 |
dataset_prepared_path: last_run_prepared
|
12 |
val_set_size: 0.0
|
13 |
-
output_dir: ./out
|
14 |
|
15 |
sequence_len: 512
|
16 |
sample_packing: false
|
|
|
10 |
type: alpaca
|
11 |
dataset_prepared_path: last_run_prepared
|
12 |
val_set_size: 0.0
|
13 |
+
output_dir: ./outputs/out
|
14 |
|
15 |
sequence_len: 512
|
16 |
sample_packing: false
|
examples/dbrx/8bit-lora.yaml
CHANGED
@@ -10,7 +10,7 @@ datasets:
|
|
10 |
type: alpaca
|
11 |
dataset_prepared_path: last_run_prepared
|
12 |
val_set_size: 0.0
|
13 |
-
output_dir: ./out
|
14 |
|
15 |
sequence_len: 512
|
16 |
sample_packing: false
|
|
|
10 |
type: alpaca
|
11 |
dataset_prepared_path: last_run_prepared
|
12 |
val_set_size: 0.0
|
13 |
+
output_dir: ./outputs/out
|
14 |
|
15 |
sequence_len: 512
|
16 |
sample_packing: false
|
examples/dbrx/fft-ds-zero3.yaml
CHANGED
@@ -10,7 +10,7 @@ datasets:
|
|
10 |
type: alpaca
|
11 |
dataset_prepared_path: last_run_prepared
|
12 |
val_set_size: 0.0
|
13 |
-
output_dir: ./out
|
14 |
|
15 |
sequence_len: 512
|
16 |
sample_packing: false
|
|
|
10 |
type: alpaca
|
11 |
dataset_prepared_path: last_run_prepared
|
12 |
val_set_size: 0.0
|
13 |
+
output_dir: ./outputs/out
|
14 |
|
15 |
sequence_len: 512
|
16 |
sample_packing: false
|
examples/falcon/config-7b-lora.yml
CHANGED
@@ -28,7 +28,7 @@ wandb_entity:
|
|
28 |
wandb_watch:
|
29 |
wandb_name:
|
30 |
wandb_log_model:
|
31 |
-
output_dir: ./falcon-7b
|
32 |
batch_size: 2
|
33 |
micro_batch_size: 1
|
34 |
num_epochs: 4
|
|
|
28 |
wandb_watch:
|
29 |
wandb_name:
|
30 |
wandb_log_model:
|
31 |
+
output_dir: ./outputs/falcon-7b
|
32 |
batch_size: 2
|
33 |
micro_batch_size: 1
|
34 |
num_epochs: 4
|
examples/falcon/config-7b-qlora.yml
CHANGED
@@ -42,7 +42,7 @@ wandb_entity:
|
|
42 |
wandb_watch:
|
43 |
wandb_name:
|
44 |
wandb_log_model:
|
45 |
-
output_dir: ./qlora-out
|
46 |
|
47 |
# QLoRA paper Table 9
|
48 |
# - 16 for 7b & 13b
|
|
|
42 |
wandb_watch:
|
43 |
wandb_name:
|
44 |
wandb_log_model:
|
45 |
+
output_dir: ./outputs/qlora-out
|
46 |
|
47 |
# QLoRA paper Table 9
|
48 |
# - 16 for 7b & 13b
|
examples/falcon/config-7b.yml
CHANGED
@@ -28,7 +28,7 @@ wandb_entity:
|
|
28 |
wandb_watch:
|
29 |
wandb_name:
|
30 |
wandb_log_model:
|
31 |
-
output_dir: ./falcon-7b
|
32 |
batch_size: 2
|
33 |
micro_batch_size: 1
|
34 |
num_epochs: 4
|
|
|
28 |
wandb_watch:
|
29 |
wandb_name:
|
30 |
wandb_log_model:
|
31 |
+
output_dir: ./outputs/falcon-7b
|
32 |
batch_size: 2
|
33 |
micro_batch_size: 1
|
34 |
num_epochs: 4
|
examples/gemma/qlora.yml
CHANGED
@@ -12,7 +12,7 @@ datasets:
|
|
12 |
- path: mhenrichsen/alpaca_2k_test
|
13 |
type: alpaca
|
14 |
val_set_size: 0.1
|
15 |
-
output_dir: ./out
|
16 |
|
17 |
adapter: qlora
|
18 |
lora_r: 32
|
|
|
12 |
- path: mhenrichsen/alpaca_2k_test
|
13 |
type: alpaca
|
14 |
val_set_size: 0.1
|
15 |
+
output_dir: ./outputs/out
|
16 |
|
17 |
adapter: qlora
|
18 |
lora_r: 32
|
examples/gptj/qlora.yml
CHANGED
@@ -23,7 +23,7 @@ wandb_entity:
|
|
23 |
wandb_watch:
|
24 |
wandb_name:
|
25 |
wandb_log_model:
|
26 |
-
output_dir: ./qlora-out
|
27 |
gradient_accumulation_steps: 2
|
28 |
micro_batch_size: 2
|
29 |
num_epochs: 2
|
|
|
23 |
wandb_watch:
|
24 |
wandb_name:
|
25 |
wandb_log_model:
|
26 |
+
output_dir: ./outputs/qlora-out
|
27 |
gradient_accumulation_steps: 2
|
28 |
micro_batch_size: 2
|
29 |
num_epochs: 2
|
examples/jamba/qlora.yaml
CHANGED
@@ -10,7 +10,7 @@ datasets:
|
|
10 |
type: alpaca
|
11 |
dataset_prepared_path:
|
12 |
val_set_size: 0.0
|
13 |
-
output_dir: ./out
|
14 |
|
15 |
sequence_len: 4096
|
16 |
sample_packing: false
|
|
|
10 |
type: alpaca
|
11 |
dataset_prepared_path:
|
12 |
val_set_size: 0.0
|
13 |
+
output_dir: ./outputs/out
|
14 |
|
15 |
sequence_len: 4096
|
16 |
sample_packing: false
|
examples/jamba/qlora_deepspeed.yaml
CHANGED
@@ -10,7 +10,7 @@ datasets:
|
|
10 |
type: alpaca
|
11 |
dataset_prepared_path:
|
12 |
val_set_size: 0.0
|
13 |
-
output_dir: ./out
|
14 |
|
15 |
sequence_len: 4096
|
16 |
sample_packing: false
|
|
|
10 |
type: alpaca
|
11 |
dataset_prepared_path:
|
12 |
val_set_size: 0.0
|
13 |
+
output_dir: ./outputs/out
|
14 |
|
15 |
sequence_len: 4096
|
16 |
sample_packing: false
|
examples/jeopardy-bot/config.yml
CHANGED
@@ -21,7 +21,7 @@ wandb_entity:
|
|
21 |
wandb_watch:
|
22 |
wandb_name:
|
23 |
wandb_log_model:
|
24 |
-
output_dir: ./jeopardy-bot-7b
|
25 |
gradient_accumulation_steps: 1
|
26 |
micro_batch_size: 1
|
27 |
num_epochs: 4
|
|
|
21 |
wandb_watch:
|
22 |
wandb_name:
|
23 |
wandb_log_model:
|
24 |
+
output_dir: ./outputs/jeopardy-bot-7b
|
25 |
gradient_accumulation_steps: 1
|
26 |
micro_batch_size: 1
|
27 |
num_epochs: 4
|
examples/llama-2/fft_optimized.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
examples/llama-2/gptq-lora.yml
CHANGED
@@ -33,7 +33,7 @@ wandb_project:
|
|
33 |
wandb_watch:
|
34 |
wandb_name:
|
35 |
wandb_log_model:
|
36 |
-
output_dir: ./model-out
|
37 |
gradient_accumulation_steps: 1
|
38 |
micro_batch_size: 1
|
39 |
num_epochs: 4
|
|
|
33 |
wandb_watch:
|
34 |
wandb_name:
|
35 |
wandb_log_model:
|
36 |
+
output_dir: ./outputs/model-out
|
37 |
gradient_accumulation_steps: 1
|
38 |
micro_batch_size: 1
|
39 |
num_epochs: 4
|
examples/llama-2/lisa.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./lisa-out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/lisa-out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
examples/llama-2/loftq.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./lora-out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/lora-out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
examples/llama-2/lora.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./lora-out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/lora-out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
examples/llama-2/qlora-fsdp.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./qlora-out
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/qlora-out
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
examples/llama-2/qlora.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./qlora-out
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/qlora-out
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
examples/llama-2/relora.yml
CHANGED
@@ -12,7 +12,7 @@ datasets:
|
|
12 |
type: alpaca
|
13 |
dataset_prepared_path:
|
14 |
val_set_size: 0.05
|
15 |
-
output_dir: ./relora-out
|
16 |
|
17 |
adapter: qlora
|
18 |
lora_model_dir:
|
|
|
12 |
type: alpaca
|
13 |
dataset_prepared_path:
|
14 |
val_set_size: 0.05
|
15 |
+
output_dir: ./outputs/relora-out
|
16 |
|
17 |
adapter: qlora
|
18 |
lora_model_dir:
|
examples/llama-3/fft-8b.yaml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./out
|
15 |
|
16 |
sequence_len: 8192
|
17 |
sample_packing: true
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/out
|
15 |
|
16 |
sequence_len: 8192
|
17 |
sample_packing: true
|
examples/llama-3/lora-8b.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./lora-out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/lora-out
|
15 |
|
16 |
sequence_len: 4096
|
17 |
sample_packing: true
|
examples/llama-3/qlora-fsdp-70b.yaml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./out/qlora-llama3-70b
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/out/qlora-llama3-70b
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
examples/llama-3/qlora.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0
|
14 |
-
output_dir: ./qlora-out
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0
|
14 |
+
output_dir: ./outputs/qlora-out
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
examples/mamba/config.yml
CHANGED
@@ -12,7 +12,7 @@ datasets:
|
|
12 |
type: alpaca
|
13 |
dataset_prepared_path:
|
14 |
val_set_size: 0.0
|
15 |
-
output_dir: ./out
|
16 |
|
17 |
sequence_len: 2048
|
18 |
sample_packing: false
|
|
|
12 |
type: alpaca
|
13 |
dataset_prepared_path:
|
14 |
val_set_size: 0.0
|
15 |
+
output_dir: ./outputs/out
|
16 |
|
17 |
sequence_len: 2048
|
18 |
sample_packing: false
|
examples/mistral/bigstral-ds-zero3.yaml
CHANGED
@@ -23,7 +23,7 @@ datasets:
|
|
23 |
type: alpaca
|
24 |
dataset_prepared_path: last_run_prepared
|
25 |
val_set_size: 0.05
|
26 |
-
output_dir: ./out
|
27 |
|
28 |
sequence_len: 2048
|
29 |
sample_packing: true
|
|
|
23 |
type: alpaca
|
24 |
dataset_prepared_path: last_run_prepared
|
25 |
val_set_size: 0.05
|
26 |
+
output_dir: ./outputs/out
|
27 |
|
28 |
sequence_len: 2048
|
29 |
sample_packing: true
|
examples/mistral/config.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
-
output_dir: ./out
|
15 |
|
16 |
sequence_len: 8192
|
17 |
sample_packing: true
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path:
|
13 |
val_set_size: 0.05
|
14 |
+
output_dir: ./outputs/out
|
15 |
|
16 |
sequence_len: 8192
|
17 |
sample_packing: true
|
examples/mistral/lora-mps.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0
|
14 |
-
output_dir: ./lora-out
|
15 |
eval_sample_packing: false
|
16 |
|
17 |
adapter: lora
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0
|
14 |
+
output_dir: ./outputs/lora-out
|
15 |
eval_sample_packing: false
|
16 |
|
17 |
adapter: lora
|
examples/mistral/lora.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.1
|
14 |
-
output_dir: ./lora-out
|
15 |
|
16 |
adapter: lora
|
17 |
lora_model_dir:
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.1
|
14 |
+
output_dir: ./outputs/lora-out
|
15 |
|
16 |
adapter: lora
|
17 |
lora_model_dir:
|
examples/mistral/mistral-qlora-fsdp.yml
CHANGED
@@ -12,7 +12,7 @@ datasets:
|
|
12 |
type: alpaca
|
13 |
dataset_prepared_path: last_run_prepared
|
14 |
val_set_size: 0.02
|
15 |
-
output_dir: ./qlora-out
|
16 |
|
17 |
model_config:
|
18 |
output_router_logits: true
|
|
|
12 |
type: alpaca
|
13 |
dataset_prepared_path: last_run_prepared
|
14 |
val_set_size: 0.02
|
15 |
+
output_dir: ./outputs/qlora-out
|
16 |
|
17 |
model_config:
|
18 |
output_router_logits: true
|
examples/mistral/mistral-qlora-orpo.yml
CHANGED
@@ -16,7 +16,7 @@ datasets:
|
|
16 |
type: chat_template.argilla
|
17 |
dataset_prepared_path: last_run_prepared
|
18 |
val_set_size: 0.1
|
19 |
-
output_dir: ./mistral-qlora-orpo-out
|
20 |
|
21 |
adapter: qlora
|
22 |
lora_model_dir:
|
|
|
16 |
type: chat_template.argilla
|
17 |
dataset_prepared_path: last_run_prepared
|
18 |
val_set_size: 0.1
|
19 |
+
output_dir: ./outputs/mistral-qlora-orpo-out
|
20 |
|
21 |
adapter: qlora
|
22 |
lora_model_dir:
|
examples/mistral/mixtral-8x22b-qlora-fsdp.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.02
|
14 |
-
output_dir: ./qlora-out
|
15 |
|
16 |
model_config:
|
17 |
output_router_logits: true
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.02
|
14 |
+
output_dir: ./outputs/qlora-out
|
15 |
|
16 |
model_config:
|
17 |
output_router_logits: true
|
examples/mistral/mixtral-qlora-fsdp.yml
CHANGED
@@ -12,7 +12,7 @@ datasets:
|
|
12 |
type: alpaca
|
13 |
dataset_prepared_path: last_run_prepared
|
14 |
val_set_size: 0.02
|
15 |
-
output_dir: ./qlora-out
|
16 |
|
17 |
model_config:
|
18 |
output_router_logits: true
|
|
|
12 |
type: alpaca
|
13 |
dataset_prepared_path: last_run_prepared
|
14 |
val_set_size: 0.02
|
15 |
+
output_dir: ./outputs/qlora-out
|
16 |
|
17 |
model_config:
|
18 |
output_router_logits: true
|
examples/mistral/mixtral.yml
CHANGED
@@ -12,7 +12,7 @@ datasets:
|
|
12 |
type: alpaca
|
13 |
dataset_prepared_path: last_run_prepared
|
14 |
val_set_size: 0.0
|
15 |
-
output_dir: ./qlora-out
|
16 |
|
17 |
## You can optionally freeze the entire model and unfreeze a subset of parameters
|
18 |
unfrozen_parameters:
|
|
|
12 |
type: alpaca
|
13 |
dataset_prepared_path: last_run_prepared
|
14 |
val_set_size: 0.0
|
15 |
+
output_dir: ./outputs/qlora-out
|
16 |
|
17 |
## You can optionally freeze the entire model and unfreeze a subset of parameters
|
18 |
unfrozen_parameters:
|
examples/mistral/mixtral_22.yml
CHANGED
@@ -21,7 +21,7 @@ model_config:
|
|
21 |
datasets:
|
22 |
- path: yahma/alpaca-cleaned
|
23 |
type: alpaca
|
24 |
-
output_dir: ./out
|
25 |
|
26 |
sequence_len: 8000
|
27 |
sample_packing: true
|
|
|
21 |
datasets:
|
22 |
- path: yahma/alpaca-cleaned
|
23 |
type: alpaca
|
24 |
+
output_dir: ./outputs/out
|
25 |
|
26 |
sequence_len: 8000
|
27 |
sample_packing: true
|
examples/mistral/qlora.yml
CHANGED
@@ -11,7 +11,7 @@ datasets:
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.1
|
14 |
-
output_dir: ./qlora-out
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
|
|
11 |
type: alpaca
|
12 |
dataset_prepared_path: last_run_prepared
|
13 |
val_set_size: 0.1
|
14 |
+
output_dir: ./outputs/qlora-out
|
15 |
|
16 |
adapter: qlora
|
17 |
lora_model_dir:
|
examples/mpt-7b/config.yml
CHANGED
@@ -23,7 +23,7 @@ wandb_entity:
|
|
23 |
wandb_watch:
|
24 |
wandb_name:
|
25 |
wandb_log_model:
|
26 |
-
output_dir: ./mpt-alpaca-7b
|
27 |
gradient_accumulation_steps: 1
|
28 |
micro_batch_size: 1
|
29 |
num_epochs: 4
|
|
|
23 |
wandb_watch:
|
24 |
wandb_name:
|
25 |
wandb_log_model:
|
26 |
+
output_dir: ./outputs/mpt-alpaca-7b
|
27 |
gradient_accumulation_steps: 1
|
28 |
micro_batch_size: 1
|
29 |
num_epochs: 4
|
examples/openllama-3b/config.yml
CHANGED
@@ -25,7 +25,7 @@ wandb_entity:
|
|
25 |
wandb_watch:
|
26 |
wandb_name:
|
27 |
wandb_log_model:
|
28 |
-
output_dir: ./openllama-out
|
29 |
gradient_accumulation_steps: 1
|
30 |
micro_batch_size: 1
|
31 |
num_epochs: 4
|
|
|
25 |
wandb_watch:
|
26 |
wandb_name:
|
27 |
wandb_log_model:
|
28 |
+
output_dir: ./outputs/openllama-out
|
29 |
gradient_accumulation_steps: 1
|
30 |
micro_batch_size: 1
|
31 |
num_epochs: 4
|
examples/openllama-3b/lora.yml
CHANGED
@@ -31,7 +31,7 @@ wandb_entity:
|
|
31 |
wandb_watch:
|
32 |
wandb_name:
|
33 |
wandb_log_model:
|
34 |
-
output_dir: ./lora-out
|
35 |
gradient_accumulation_steps: 1
|
36 |
micro_batch_size: 2
|
37 |
num_epochs: 4
|
|
|
31 |
wandb_watch:
|
32 |
wandb_name:
|
33 |
wandb_log_model:
|
34 |
+
output_dir: ./outputs/lora-out
|
35 |
gradient_accumulation_steps: 1
|
36 |
micro_batch_size: 2
|
37 |
num_epochs: 4
|
examples/openllama-3b/qlora.yml
CHANGED
@@ -25,7 +25,7 @@ wandb_entity:
|
|
25 |
wandb_watch:
|
26 |
wandb_name:
|
27 |
wandb_log_model:
|
28 |
-
output_dir: ./qlora-out
|
29 |
gradient_accumulation_steps: 1
|
30 |
micro_batch_size: 2
|
31 |
num_epochs: 4
|
|
|
25 |
wandb_watch:
|
26 |
wandb_name:
|
27 |
wandb_log_model:
|
28 |
+
output_dir: ./outputs/qlora-out
|
29 |
gradient_accumulation_steps: 1
|
30 |
micro_batch_size: 2
|
31 |
num_epochs: 4
|
examples/phi/phi-ft.yml
CHANGED
@@ -12,7 +12,7 @@ datasets:
|
|
12 |
|
13 |
dataset_prepared_path:
|
14 |
val_set_size: 0.05
|
15 |
-
output_dir: ./phi-sft-out
|
16 |
|
17 |
sequence_len: 2048
|
18 |
sample_packing: true
|
|
|
12 |
|
13 |
dataset_prepared_path:
|
14 |
val_set_size: 0.05
|
15 |
+
output_dir: ./outputs/phi-sft-out
|
16 |
|
17 |
sequence_len: 2048
|
18 |
sample_packing: true
|