theblackcat102 commited on
Commit
5f1573c
·
verified ·
1 Parent(s): 4a73acd

Create axolotl/pretrain_test.yaml

Browse files
Files changed (1) hide show
  1. axolotl/pretrain_test.yaml +113 -0
axolotl/pretrain_test.yaml ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: theblackcat102/whale-v3-base-merged
2
+ # optionally might have model_type or tokenizer_type
3
+ model_type: AutoModelForCausalLM
4
+ tokenizer_type: AutoTokenizer
5
+ trust_remote_code: true
6
+ # Automatically upload checkpoint and final model to HF
7
+ # hub_model_id: username/custom_model_name
8
+
9
+ load_in_8bit: false
10
+ load_in_4bit: true
11
+ strict: false
12
+
13
+ adapter: qlora
14
+ lora_r: 380
15
+ lora_alpha: 380
16
+ lora_target_modules: [down_proj,gate_proj,up_proj]
17
+ lora_target_linear: false
18
+ peft_use_rslora: true
19
+
20
+ max_steps: 100000
21
+ datasets:
22
+ - path: cerebras/SlimPajama-627B
23
+ data_files:
24
+ - train/chunk1/example_train_0.jsonl.zst
25
+ - train/chunk1/example_train_1.jsonl.zst
26
+ - train/chunk1/example_train_2.jsonl.zst
27
+ - train/chunk1/example_train_3.jsonl.zst
28
+ - train/chunk1/example_train_4.jsonl.zst
29
+ - train/chunk1/example_train_5.jsonl.zst
30
+ - train/chunk1/example_train_6.jsonl.zst
31
+ - train/chunk1/example_train_7.jsonl.zst
32
+ - train/chunk1/example_train_8.jsonl.zst
33
+ - train/chunk1/example_train_9.jsonl.zst
34
+ - train/chunk1/example_train_10.jsonl.zst
35
+ - train/chunk1/example_train_11.jsonl.zst
36
+ - train/chunk1/example_train_12.jsonl.zst
37
+ - train/chunk1/example_train_13.jsonl.zst
38
+ - train/chunk1/example_train_14.jsonl.zst
39
+ - train/chunk1/example_train_15.jsonl.zst
40
+ - train/chunk1/example_train_16.jsonl.zst
41
+ - train/chunk1/example_train_17.jsonl.zst
42
+ - train/chunk1/example_train_18.jsonl.zst
43
+ - train/chunk1/example_train_19.jsonl.zst
44
+ - train/chunk1/example_train_20.jsonl.zst
45
+ - train/chunk1/example_train_21.jsonl.zst
46
+ - train/chunk1/example_train_22.jsonl.zst
47
+ - train/chunk1/example_train_23.jsonl.zst
48
+ - train/chunk1/example_train_24.jsonl.zst
49
+ - train/chunk1/example_train_25.jsonl.zst
50
+ - train/chunk1/example_train_26.jsonl.zst
51
+ - train/chunk1/example_train_27.jsonl.zst
52
+ - train/chunk1/example_train_28.jsonl.zst
53
+ - train/chunk1/example_train_29.jsonl.zst
54
+ - train/chunk1/example_train_30.jsonl.zst
55
+ - train/chunk1/example_train_31.jsonl.zst
56
+ - train/chunk1/example_train_32.jsonl.zst
57
+ - train/chunk1/example_train_33.jsonl.zst
58
+ - train/chunk1/example_train_34.jsonl.zst
59
+ - train/chunk1/example_train_35.jsonl.zst
60
+ - train/chunk1/example_train_36.jsonl.zst
61
+ - train/chunk1/example_train_37.jsonl.zst
62
+ - train/chunk1/example_train_38.jsonl.zst
63
+ - train/chunk1/example_train_39.jsonl.zst
64
+ - train/chunk1/example_train_40.jsonl.zst
65
+ split: train
66
+ type: completion
67
+ dataset_prepared_path: last_run_mixed
68
+ val_set_size: 0.0
69
+ output_dir: ./outputs/model-out-mix
70
+
71
+ sequence_len: 1024
72
+ sample_packing: true
73
+ # unfrozen_parameters:
74
+ # - model.layers.*.mlp.(gate|up|down)_proj.weight$
75
+ wandb_project: whale-v3-post-pt
76
+ wandb_entity: theblackcat102
77
+ wandb_watch:
78
+ wandb_name:
79
+ wandb_log_model:
80
+
81
+ gradient_accumulation_steps: 32
82
+ micro_batch_size: 1
83
+ num_epochs: 4
84
+ optimizer: adamw_bnb_8bit
85
+ lr_scheduler: cosine
86
+ learning_rate: 0.00008
87
+ max_grad_norm: 10.0
88
+ adam_beta2: 0.95
89
+
90
+ train_on_inputs: false
91
+ group_by_length: false
92
+ bf16: auto
93
+ fp16:
94
+ tf32: false
95
+
96
+ gradient_checkpointing: true
97
+ early_stopping_patience:
98
+ resume_from_checkpoint:
99
+ local_rank:
100
+ logging_steps: 1
101
+ xformers_attention:
102
+ flash_attention: true
103
+
104
+ warmup_steps: 50
105
+ evals_per_epoch:
106
+ eval_table_size:
107
+ save_steps: 100
108
+ debug:
109
+ deepspeed:
110
+ weight_decay: 0.0
111
+ fsdp:
112
+ fsdp_config:
113
+ special_tokens: