alpindale commited on
Commit
1db5c1b
1 Parent(s): 4c42559

Create axolotl/config.yaml

Browse files
Files changed (1) hide show
  1. axolotl/config.yaml +83 -0
axolotl/config.yaml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen1.5-32B
2
+ model_type: AutoModelForCausalLM
3
+ tokenizer_type: AutoTokenizer
4
+
5
+ trust_remote_code: true
6
+
7
+ load_in_8bit: false
8
+ load_in_4bit: false
9
+ strict: false
10
+
11
+
12
+ datasets:
13
+ - path: u-acc/mimi
14
+ type: sharegpt
15
+ conversation: chatml
16
+ - path: u-acc/sonnetorcasubset
17
+ type: sharegpt
18
+ conversation: chatml
19
+ - path: u-acc/claude_writing
20
+ type: sharegpt
21
+ conversation: chatml
22
+ - path: kalomaze/Opus_Instruct_3k
23
+ type: sharegpt
24
+ conversation: chatml
25
+ - path: kalomaze/Opus_Instruct_25k
26
+ type: sharegpt
27
+ conversation: chatml
28
+ chat_template: chatml
29
+ dataset_prepared_path:
30
+ val_set_size: 0.0
31
+ output_dir: ./magnum-32b-v1
32
+ default_system_message: You are an assistant that responds to the user.
33
+ sequence_len: 8192
34
+ sample_packing: true
35
+ eval_sample_packing: false
36
+ pad_to_sequence_len: false
37
+
38
+ adapter:
39
+ lora_model_dir:
40
+ lora_r:
41
+ lora_alpha:
42
+ lora_dropout:
43
+ lora_target_linear:
44
+ lora_fan_in_fan_out:
45
+
46
+ wandb_project: magnum-32b-v2
47
+ wandb_entity:
48
+ wandb_watch:
49
+ wandb_name: attempt-2
50
+ wandb_log_model:
51
+
52
+ gradient_accumulation_steps: 4
53
+ micro_batch_size: 1
54
+ num_epochs: 2
55
+ optimizer: paged_adamw_8bit
56
+ lr_scheduler: cosine
57
+ learning_rate: 0.00001
58
+
59
+ train_on_inputs: false
60
+ group_by_length: false
61
+ bf16: auto
62
+ fp16:
63
+ tf32: false
64
+
65
+ gradient_checkpointing: unsloth
66
+ early_stopping_patience:
67
+ resume_from_checkpoint:
68
+ local_rank:
69
+ logging_steps: 1
70
+ xformers_attention:
71
+ flash_attention: true
72
+
73
+ warmup_steps: 30
74
+ evals_per_epoch:
75
+ eval_table_size:
76
+ eval_max_new_tokens:
77
+ saves_per_epoch: 2
78
+ debug:
79
+ deepspeed: deepspeed_configs/zero3_bf16.json
80
+ weight_decay: 0.0
81
+ fsdp:
82
+ fsdp_config:
83
+ special_tokens: