winglian commited on
Commit
00ac302
1 Parent(s): 9c1af1a

add qwen2-72b fsdp example (#1696)

Browse files
Files changed (1) hide show
  1. examples/qwen2/qlora-fsdp.yaml +75 -0
examples/qwen2/qlora-fsdp.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2-7B
2
+ trust_remote_code: true
3
+
4
+ load_in_8bit: false
5
+ load_in_4bit: true
6
+ strict: false
7
+
8
+ datasets:
9
+ - path: tatsu-lab/alpaca
10
+ type: alpaca
11
+ dataset_prepared_path:
12
+ val_set_size: 0.05
13
+ output_dir: ./outputs/out
14
+
15
+ sequence_len: 2048
16
+ sample_packing: true
17
+ eval_sample_packing: true
18
+ pad_to_sequence_len: true
19
+
20
+ adapter: qlora
21
+ lora_model_dir:
22
+ lora_r: 32
23
+ lora_alpha: 64
24
+ lora_dropout: 0.05
25
+ lora_target_linear: true
26
+ lora_fan_in_fan_out:
27
+
28
+ wandb_project:
29
+ wandb_entity:
30
+ wandb_watch:
31
+ wandb_name:
32
+ wandb_log_model:
33
+
34
+ gradient_accumulation_steps: 4
35
+ micro_batch_size: 1
36
+ num_epochs: 4
37
+ optimizer: adamw_torch
38
+ lr_scheduler: cosine
39
+ learning_rate: 0.0002
40
+
41
+ train_on_inputs: false
42
+ group_by_length: false
43
+ bf16: auto
44
+ fp16:
45
+ tf32: true
46
+
47
+ gradient_checkpointing: true
48
+ gradient_checkpointing_kwargs:
49
+ use_reentrant: false
50
+ early_stopping_patience:
51
+ resume_from_checkpoint:
52
+ local_rank:
53
+ logging_steps: 1
54
+ xformers_attention:
55
+ flash_attention: true
56
+
57
+ warmup_steps: 10
58
+ evals_per_epoch: 4
59
+ saves_per_epoch: 1
60
+ debug:
61
+ deepspeed:
62
+ weight_decay: 0.0
63
+ fsdp:
64
+ - full_shard
65
+ - auto_wrap
66
+ fsdp_config:
67
+ fsdp_limit_all_gathers: true
68
+ fsdp_sync_module_states: true
69
+ fsdp_offload_params: true
70
+ fsdp_use_orig_params: false
71
+ fsdp_cpu_ram_efficient_loading: true
72
+ fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
73
+ fsdp_transformer_layer_cls_to_wrap: Qwen2DecoderLayer
74
+ fsdp_state_dict_type: FULL_STATE_DICT
75
+ special_tokens: