mhenrichsen mhenrichsen commited on
Commit
3513071
1 Parent(s): 3fc9006

Feat(cfg): Add code-llama configs for all sizes (#479)

Browse files

* configs for all sizes

* update tokenizer type

---------

Co-authored-by: mhenrichsen <some_email@hey.com>

examples/code-llama/13b/lora.yml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: codellama/CodeLlama-13b-hf
2
+ base_model_config: codellama/CodeLlama-13b-hf
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: CodeLlamaTokenizer
5
+ is_llama_derived_model: true
6
+
7
+ load_in_8bit: true
8
+ load_in_4bit: false
9
+ strict: false
10
+
11
+ datasets:
12
+ - path: mhenrichsen/alpaca_2k_test
13
+ type: alpaca
14
+ dataset_prepared_path: last_run_prepared
15
+ val_set_size: 0.01
16
+ output_dir: ./lora-out
17
+
18
+ sequence_len: 100000
19
+ sample_packing: true
20
+
21
+ adapter: lora
22
+ lora_model_dir:
23
+ lora_r: 32
24
+ lora_alpha: 16
25
+ lora_dropout: 0.05
26
+ lora_target_linear: true
27
+ lora_fan_in_fan_out:
28
+
29
+ wandb_project:
30
+ wandb_entity:
31
+ wandb_watch:
32
+ wandb_run_id:
33
+ wandb_log_model:
34
+
35
+ gradient_accumulation_steps: 4
36
+ micro_batch_size: 2
37
+ num_epochs: 3
38
+ optimizer: adamw_bnb_8bit
39
+ lr_scheduler: cosine
40
+ learning_rate: 0.0002
41
+
42
+ train_on_inputs: false
43
+ group_by_length: false
44
+ bf16: true
45
+ fp16: false
46
+ tf32: false
47
+
48
+ gradient_checkpointing: true
49
+ early_stopping_patience:
50
+ resume_from_checkpoint:
51
+ local_rank:
52
+ logging_steps: 1
53
+ xformers_attention:
54
+ flash_attention: true
55
+
56
+ warmup_steps: 10
57
+ eval_steps: 20
58
+ save_steps:
59
+ debug:
60
+ deepspeed:
61
+ weight_decay: 0.0
62
+ fsdp:
63
+ fsdp_config:
64
+ special_tokens:
65
+ bos_token: "<s>"
66
+ eos_token: "</s>"
67
+ unk_token: "<unk>"
examples/code-llama/13b/qlora.yml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: codellama/CodeLlama-13b-hf
2
+ base_model_config: codellama/CodeLlama-13b-hf
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: CodeLlamaTokenizer
5
+ is_llama_derived_model: true
6
+
7
+ load_in_8bit: false
8
+ load_in_4bit: true
9
+ strict: false
10
+
11
+ datasets:
12
+ - path: mhenrichsen/alpaca_2k_test
13
+ type: alpaca
14
+ dataset_prepared_path: last_run_prepared
15
+ val_set_size: 0.01
16
+ output_dir: ./qlora-out
17
+
18
+ adapter: qlora
19
+ lora_model_dir:
20
+
21
+ sequence_len: 100000
22
+ sample_packing: true
23
+
24
+ lora_r: 32
25
+ lora_alpha: 16
26
+ lora_dropout: 0.05
27
+ lora_target_modules:
28
+ lora_target_linear: true
29
+ lora_fan_in_fan_out:
30
+
31
+ wandb_project:
32
+ wandb_entity:
33
+ wandb_watch:
34
+ wandb_run_id:
35
+ wandb_log_model:
36
+
37
+ gradient_accumulation_steps: 4
38
+ micro_batch_size: 2
39
+ num_epochs: 3
40
+ optimizer: paged_adamw_32bit
41
+ lr_scheduler: cosine
42
+ learning_rate: 0.0002
43
+
44
+ train_on_inputs: false
45
+ group_by_length: false
46
+ bf16: true
47
+ fp16: false
48
+ tf32: false
49
+
50
+ gradient_checkpointing: true
51
+ early_stopping_patience:
52
+ resume_from_checkpoint:
53
+ local_rank:
54
+ logging_steps: 1
55
+ xformers_attention:
56
+ flash_attention: true
57
+
58
+ warmup_steps: 10
59
+ eval_steps: 20
60
+ save_steps:
61
+ debug:
62
+ deepspeed:
63
+ weight_decay: 0.0
64
+ fsdp:
65
+ fsdp_config:
66
+ special_tokens:
67
+ bos_token: "<s>"
68
+ eos_token: "</s>"
69
+ unk_token: "<unk>"
examples/code-llama/34b/lora.yml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: codellama/CodeLlama-34b-hf
2
+ base_model_config: codellama/CodeLlama-34b-hf
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: CodeLlamaTokenizer
5
+ is_llama_derived_model: true
6
+
7
+ load_in_8bit: true
8
+ load_in_4bit: false
9
+ strict: false
10
+
11
+ datasets:
12
+ - path: mhenrichsen/alpaca_2k_test
13
+ type: alpaca
14
+ dataset_prepared_path: last_run_prepared
15
+ val_set_size: 0.01
16
+ output_dir: ./lora-out
17
+
18
+ sequence_len: 100000
19
+ sample_packing: true
20
+
21
+ adapter: lora
22
+ lora_model_dir:
23
+ lora_r: 32
24
+ lora_alpha: 16
25
+ lora_dropout: 0.05
26
+ lora_target_linear: true
27
+ lora_fan_in_fan_out:
28
+
29
+ wandb_project:
30
+ wandb_entity:
31
+ wandb_watch:
32
+ wandb_run_id:
33
+ wandb_log_model:
34
+
35
+ gradient_accumulation_steps: 4
36
+ micro_batch_size: 2
37
+ num_epochs: 3
38
+ optimizer: adamw_bnb_8bit
39
+ lr_scheduler: cosine
40
+ learning_rate: 0.0002
41
+
42
+ train_on_inputs: false
43
+ group_by_length: false
44
+ bf16: true
45
+ fp16: false
46
+ tf32: false
47
+
48
+ gradient_checkpointing: true
49
+ early_stopping_patience:
50
+ resume_from_checkpoint:
51
+ local_rank:
52
+ logging_steps: 1
53
+ xformers_attention:
54
+ flash_attention: true
55
+
56
+ warmup_steps: 10
57
+ eval_steps: 20
58
+ save_steps:
59
+ debug:
60
+ deepspeed:
61
+ weight_decay: 0.0
62
+ fsdp:
63
+ fsdp_config:
64
+ special_tokens:
65
+ bos_token: "<s>"
66
+ eos_token: "</s>"
67
+ unk_token: "<unk>"
examples/code-llama/34b/qlora.yml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: codellama/CodeLlama-34b-hf
2
+ base_model_config: codellama/CodeLlama-34b-hf
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: CodeLlamaTokenizer
5
+ is_llama_derived_model: true
6
+
7
+ load_in_8bit: false
8
+ load_in_4bit: true
9
+ strict: false
10
+
11
+ datasets:
12
+ - path: mhenrichsen/alpaca_2k_test
13
+ type: alpaca
14
+ dataset_prepared_path: last_run_prepared
15
+ val_set_size: 0.01
16
+ output_dir: ./qlora-out
17
+
18
+ adapter: qlora
19
+ lora_model_dir:
20
+
21
+ sequence_len: 100000
22
+ sample_packing: true
23
+
24
+ lora_r: 32
25
+ lora_alpha: 16
26
+ lora_dropout: 0.05
27
+ lora_target_modules:
28
+ lora_target_linear: true
29
+ lora_fan_in_fan_out:
30
+
31
+ wandb_project:
32
+ wandb_entity:
33
+ wandb_watch:
34
+ wandb_run_id:
35
+ wandb_log_model:
36
+
37
+ gradient_accumulation_steps: 4
38
+ micro_batch_size: 2
39
+ num_epochs: 3
40
+ optimizer: paged_adamw_32bit
41
+ lr_scheduler: cosine
42
+ learning_rate: 0.0002
43
+
44
+ train_on_inputs: false
45
+ group_by_length: false
46
+ bf16: true
47
+ fp16: false
48
+ tf32: false
49
+
50
+ gradient_checkpointing: true
51
+ early_stopping_patience:
52
+ resume_from_checkpoint:
53
+ local_rank:
54
+ logging_steps: 1
55
+ xformers_attention:
56
+ flash_attention: true
57
+
58
+ warmup_steps: 10
59
+ eval_steps: 20
60
+ save_steps:
61
+ debug:
62
+ deepspeed:
63
+ weight_decay: 0.0
64
+ fsdp:
65
+ fsdp_config:
66
+ special_tokens:
67
+ bos_token: "<s>"
68
+ eos_token: "</s>"
69
+ unk_token: "<unk>"
examples/code-llama/7b/lora.yml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: codellama/CodeLlama-7b-hf
2
+ base_model_config: codellama/CodeLlama-7b-hf
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: CodeLlamaTokenizer
5
+ is_llama_derived_model: true
6
+
7
+ load_in_8bit: true
8
+ load_in_4bit: false
9
+ strict: false
10
+
11
+ datasets:
12
+ - path: mhenrichsen/alpaca_2k_test
13
+ type: alpaca
14
+ dataset_prepared_path: last_run_prepared
15
+ val_set_size: 0.01
16
+ output_dir: ./lora-out
17
+
18
+ sequence_len: 100000
19
+ sample_packing: true
20
+
21
+ adapter: lora
22
+ lora_model_dir:
23
+ lora_r: 32
24
+ lora_alpha: 16
25
+ lora_dropout: 0.05
26
+ lora_target_linear: true
27
+ lora_fan_in_fan_out:
28
+
29
+ wandb_project:
30
+ wandb_entity:
31
+ wandb_watch:
32
+ wandb_run_id:
33
+ wandb_log_model:
34
+
35
+ gradient_accumulation_steps: 4
36
+ micro_batch_size: 2
37
+ num_epochs: 3
38
+ optimizer: adamw_bnb_8bit
39
+ lr_scheduler: cosine
40
+ learning_rate: 0.0002
41
+
42
+ train_on_inputs: false
43
+ group_by_length: false
44
+ bf16: true
45
+ fp16: false
46
+ tf32: false
47
+
48
+ gradient_checkpointing: true
49
+ early_stopping_patience:
50
+ resume_from_checkpoint:
51
+ local_rank:
52
+ logging_steps: 1
53
+ xformers_attention:
54
+ flash_attention: true
55
+
56
+ warmup_steps: 10
57
+ eval_steps: 20
58
+ save_steps:
59
+ debug:
60
+ deepspeed:
61
+ weight_decay: 0.0
62
+ fsdp:
63
+ fsdp_config:
64
+ special_tokens:
65
+ bos_token: "<s>"
66
+ eos_token: "</s>"
67
+ unk_token: "<unk>"
examples/code-llama/7b/qlora.yml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: codellama/CodeLlama-7b-hf
2
+ base_model_config: codellama/CodeLlama-7b-hf
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: CodeLlamaTokenizer
5
+ is_llama_derived_model: true
6
+
7
+ load_in_8bit: false
8
+ load_in_4bit: true
9
+ strict: false
10
+
11
+ datasets:
12
+ - path: mhenrichsen/alpaca_2k_test
13
+ type: alpaca
14
+ dataset_prepared_path: last_run_prepared
15
+ val_set_size: 0.01
16
+ output_dir: ./qlora-out
17
+
18
+ adapter: qlora
19
+ lora_model_dir:
20
+
21
+ sequence_len: 100000
22
+ sample_packing: true
23
+
24
+ lora_r: 32
25
+ lora_alpha: 16
26
+ lora_dropout: 0.05
27
+ lora_target_modules:
28
+ lora_target_linear: true
29
+ lora_fan_in_fan_out:
30
+
31
+ wandb_project:
32
+ wandb_entity:
33
+ wandb_watch:
34
+ wandb_run_id:
35
+ wandb_log_model:
36
+
37
+ gradient_accumulation_steps: 4
38
+ micro_batch_size: 2
39
+ num_epochs: 3
40
+ optimizer: paged_adamw_32bit
41
+ lr_scheduler: cosine
42
+ learning_rate: 0.0002
43
+
44
+ train_on_inputs: false
45
+ group_by_length: false
46
+ bf16: true
47
+ fp16: false
48
+ tf32: false
49
+
50
+ gradient_checkpointing: true
51
+ early_stopping_patience:
52
+ resume_from_checkpoint:
53
+ local_rank:
54
+ logging_steps: 1
55
+ xformers_attention:
56
+ flash_attention: true
57
+
58
+ warmup_steps: 10
59
+ eval_steps: 20
60
+ save_steps:
61
+ debug:
62
+ deepspeed:
63
+ weight_decay: 0.0
64
+ fsdp:
65
+ fsdp_config:
66
+ special_tokens:
67
+ bos_token: "<s>"
68
+ eos_token: "</s>"
69
+ unk_token: "<unk>"
examples/code-llama/README.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Overview
2
+
3
+ This is an example of CodeLLaMA configuration for 7b, 13b and 34b.
4
+
5
+ The 7b variant fits on any 24GB VRAM GPU and will take up about 17 GB of VRAM during training if using qlora and 20 GB if using lora. On a RTX 4090 it trains 3 epochs of the default dataset in about 15 minutes.
6
+
7
+ The 13b variant will fit if you change these settings to these values:
8
+ gradient_accumulation_steps: 2
9
+ micro_batch_size: 1
10
+
11
+ The 34b variant does not fit on 24GB of VRAM - you will need something with +40 gb VRAM that also supports flash attention v2 - A6000 or A100 are good choices.
12
+
13
+ ```shell
14
+ accelerate launch scripts/finetune.py examples/code-llama/[MODEL_SIZE]/qlora.yml
15
+
16
+ ```
17
+ or
18
+
19
+ ```shell
20
+ accelerate launch scripts/finetune.py examples/code-llama/[MODEL_SIZE]/lora.yml
21
+
22
+ ```