winglian commited on
Commit
02c5983
·
1 Parent(s): 3f9c953

push up redpajama 3b example

Browse files
examples/redpajama/README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # RedPajama 3B preview release
2
+
3
+ ```shell
4
+ accelerate launch scripts/finetune.py examples/redpajama/config-3b.yml
5
+
6
+ ```
examples/redpajama/config-3b.yml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
2
+ base_model_config: togethercomputer/RedPajama-INCITE-Chat-3B-v1
3
+ model_type: GPTNeoXForCausalLM
4
+ tokenizer_type: GPTNeoXTokenizer
5
+ trust_remote_code:
6
+ load_in_8bit: false
7
+ datasets:
8
+ - path: vicgalle/alpaca-gpt4
9
+ type: alpaca
10
+ dataset_prepared_path: last_run_prepared
11
+ val_set_size: 0.02
12
+ adapter:
13
+ lora_model_dir:
14
+ sequence_len: 2048
15
+ max_packed_sequence_len:
16
+ lora_r: 8
17
+ lora_alpha: 16
18
+ lora_dropout: 0.05
19
+ lora_target_modules:
20
+ - q_proj
21
+ - v_proj
22
+ lora_fan_in_fan_out: false
23
+ wandb_project: redpajama-alpaca-3b
24
+ wandb_watch:
25
+ wandb_run_id:
26
+ wandb_log_model: checkpoint
27
+ output_dir: ./redpajama-alpaca-3b
28
+ batch_size: 4
29
+ micro_batch_size: 1
30
+ num_epochs: 3
31
+ optimizer: adamw_bnb_8bit
32
+ torchdistx_path:
33
+ lr_scheduler: cosine
34
+ learning_rate: 0.0000002
35
+ train_on_inputs: false
36
+ group_by_length: false
37
+ bf16: true
38
+ tf32: true
39
+ early_stopping_patience:
40
+ resume_from_checkpoint:
41
+ local_rank:
42
+ logging_steps: 5
43
+ xformers_attention:
44
+ flash_attention:
45
+ gptq_groupsize:
46
+ gptq_model_v1:
47
+ warmup_steps: 20
48
+ eval_steps: 110
49
+ save_steps: 660
50
+ debug:
51
+ deepspeed:
52
+ weight_decay: 0.0001
53
+ fsdp:
54
+ fsdp_config:
55
+ special_tokens:
56
+ pad_token: "<|padding|>"
57
+ bos_token: "<|endoftext|>"
58
+ eos_token: "<|endoftext|>"
59
+ unk_token: "<|endoftext|>"