Evan Griffiths commited on
Commit
6ef46f8
1 Parent(s): 628b754

Add an example config for finetuning a 34B model on a 24GB GPU (#1000)

Browse files

* Add an example config for finetuning a 34B model on a 24GB GPU

* Remore wandb project

examples/yi-34B-chat/README.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Overview
2
+
3
+ This is an example of a Yi-34B-Chat configuration. It demonstrates that it is possible to finetune a 34B model on a GPU with 24GB of VRAM.
4
+
5
+ Tested on an RTX 4090 with `python -m axolotl.cli.train examples/mistral/qlora.yml`, a single epoch of finetuning on the alpaca dataset using qlora runs in 47 mins, using 97% of available memory.
examples/yi-34B-chat/qlora.yml ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: 01-ai/Yi-34B-Chat
2
+ model_type: LlamaForCausalLM
3
+ tokenizer_type: LlamaTokenizer
4
+ is_mistral_derived_model: false
5
+ is_llama_derived_model: true
6
+ load_in_8bit: false
7
+ load_in_4bit: true
8
+ strict: false
9
+ sequence_len: 1024
10
+ bf16: true
11
+ fp16: false
12
+ tf32: false
13
+ flash_attention: true
14
+ special_tokens:
15
+ bos_token: "<|startoftext|>"
16
+ eos_token: "<|endoftext|>"
17
+ unk_token: "<unk>"
18
+
19
+ # Data
20
+ datasets:
21
+ - path: mhenrichsen/alpaca_2k_test
22
+ type: alpaca
23
+ warmup_steps: 10
24
+
25
+ # Iterations
26
+ num_epochs: 1
27
+
28
+ # Evaluation
29
+ val_set_size: 0.1
30
+ evals_per_epoch: 5
31
+ eval_table_size:
32
+ eval_table_max_new_tokens: 128
33
+ eval_sample_packing: false
34
+ eval_batch_size: 1
35
+
36
+ # LoRA
37
+ output_dir: ./qlora-out
38
+ adapter: qlora
39
+ lora_model_dir:
40
+ lora_r: 32
41
+ lora_alpha: 16
42
+ lora_dropout: 0.05
43
+ lora_target_linear: true
44
+ lora_fan_in_fan_out:
45
+ lora_target_modules:
46
+
47
+ # Sampling
48
+ sample_packing: false
49
+ pad_to_sequence_len: false
50
+
51
+ # Batching
52
+ gradient_accumulation_steps: 4
53
+ micro_batch_size: 1
54
+ gradient_checkpointing: true
55
+
56
+ # wandb
57
+ wandb_project:
58
+
59
+ # Optimizer
60
+ optimizer: paged_adamw_8bit
61
+ lr_scheduler: cosine
62
+ learning_rate: 0.0002
63
+
64
+ # Misc
65
+ train_on_inputs: false
66
+ group_by_length: false
67
+ early_stopping_patience:
68
+ resume_from_checkpoint:
69
+ local_rank:
70
+ logging_steps: 1
71
+ xformers_attention:
72
+ debug:
73
+ deepspeed:
74
+ weight_decay: 0
75
+ fsdp:
76
+ fsdp_config: