lingchensanwen commited on
Commit
1d59286
1 Parent(s): 4f9c29d

Update qlora.yml

Browse files
Files changed (1) hide show
  1. qlora.yml +4 -9
qlora.yml CHANGED
@@ -1,20 +1,19 @@
1
  base_model: meta-llama/Llama-2-7b-chat-hf
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: LlamaTokenizer
4
- # hub_model_id: llama2-generation
5
 
6
  load_in_8bit: false
7
  load_in_4bit: true
8
  strict: false
9
 
10
  datasets:
11
- - path: /home/yw23374/axolotl/examples/mistral/data/final_data/upsampled_train.json
12
  ds_type: json
13
  type: alpaca
14
  split: train
15
 
16
  test_datasets:
17
- - path: /home/yw23374/axolotl/examples/mistral/data/final_data/val.json
18
  ds_type: json
19
  type: alpaca
20
  split: train
@@ -23,7 +22,7 @@ load_best_model_at_end: False
23
  early_stopping_patience:
24
  dataset_prepared_path:
25
  val_set_size: 0
26
- output_dir: ./qlora-out-llama2-balance-3nd
27
 
28
  adapter: qlora
29
  lora_model_dir:
@@ -54,7 +53,7 @@ wandb_log_model:
54
 
55
  gradient_accumulation_steps: 2
56
  micro_batch_size: 1
57
- num_epochs: 5 #3 before, 5 for balanced
58
  optimizer: adamw_bnb_8bit
59
  lr_scheduler: cosine
60
  learning_rate: 0.0001
@@ -93,7 +92,3 @@ special_tokens:
93
  bos_token: "<s>"
94
  eos_token: "</s>"
95
  unk_token: "<unk>"
96
-
97
- # tokens: # these are delimiters
98
- # - "<|im_start|>"
99
- # - "<|im_end|>"
 
1
  base_model: meta-llama/Llama-2-7b-chat-hf
2
  model_type: LlamaForCausalLM
3
  tokenizer_type: LlamaTokenizer
 
4
 
5
  load_in_8bit: false
6
  load_in_4bit: true
7
  strict: false
8
 
9
  datasets:
10
+ - path: ../upsampled_train.json
11
  ds_type: json
12
  type: alpaca
13
  split: train
14
 
15
  test_datasets:
16
+ - path: ../val.json
17
  ds_type: json
18
  type: alpaca
19
  split: train
 
22
  early_stopping_patience:
23
  dataset_prepared_path:
24
  val_set_size: 0
25
+ output_dir: ./qlora-out-llama2-balance-1st
26
 
27
  adapter: qlora
28
  lora_model_dir:
 
53
 
54
  gradient_accumulation_steps: 2
55
  micro_batch_size: 1
56
+ num_epochs: 5
57
  optimizer: adamw_bnb_8bit
58
  lr_scheduler: cosine
59
  learning_rate: 0.0001
 
92
  bos_token: "<s>"
93
  eos_token: "</s>"
94
  unk_token: "<unk>"