Text Generation
Transformers
PyTorch
English
mixformer-sequential
custom_code
winglian commited on
Commit
1ec8159
1 Parent(s): e099f79

Create configs/phi-oo.yml

Browse files
Files changed (1) hide show
  1. configs/phi-oo.yml +83 -0
configs/phi-oo.yml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: microsoft/phi-1_5
2
+ base_model_config: microsoft/phi-1_5
3
+ model_type: AutoModelForCausalLM
4
+ tokenizer_type: AutoTokenizer
5
+ is_llama_derived_model: false
6
+ trust_remote_code: true
7
+
8
+ load_in_8bit: false
9
+ load_in_4bit: false
10
+ strict: false
11
+
12
+ datasets:
13
+ - path: openaccess-ai-collective/oo-gpt4-filtered
14
+ type: alpaca_w_system.load_open_orca_chatml
15
+ data_files:
16
+ - 1M-GPT4-Augmented-filtered-gt10.parquet
17
+
18
+ dataset_prepared_path: last_run_prepared
19
+ val_set_size: 0.01
20
+ output_dir: ./phi-oo-out
21
+ hub_model_id: Open-Orca/oo-phi-1_5
22
+
23
+ sequence_len: 2048
24
+ sample_packing: false
25
+ pad_to_sequence_len:
26
+
27
+ adapter:
28
+ lora_model_dir:
29
+ lora_r:
30
+ lora_alpha:
31
+ lora_dropout:
32
+ lora_target_linear:
33
+ lora_fan_in_fan_out:
34
+
35
+ wandb_project: phi-oo
36
+ wandb_entity: open-orca
37
+ wandb_watch:
38
+ wandb_run_id:
39
+ wandb_log_model:
40
+
41
+ gradient_accumulation_steps: 1
42
+ micro_batch_size: 3
43
+ num_epochs: 5
44
+ optimizer: adamw_torch
45
+ adam_beta2: 0.95
46
+ adam_epsilon: 0.00001
47
+ max_grad_norm: 1.0
48
+ lr_scheduler: cosine
49
+ learning_rate: 0.000003
50
+
51
+ train_on_inputs: false
52
+ group_by_length: true
53
+ bf16: true
54
+ fp16: false
55
+ tf32: true
56
+
57
+ gradient_checkpointing:
58
+ early_stopping_patience:
59
+ resume_from_checkpoint:
60
+ local_rank:
61
+ logging_steps: 1
62
+ xformers_attention:
63
+ flash_attention:
64
+
65
+ warmup_steps: 1000
66
+ eval_steps: 0.02
67
+ save_steps: 0.10
68
+ do_bench_eval: true
69
+ bench_dataset: "pharaouk/dharma-1/dharma_1_full.json"
70
+ debug:
71
+ deepspeed:
72
+ weight_decay: 0.1
73
+ fsdp:
74
+ fsdp_config:
75
+ resize_token_embeddings_to_32x: true
76
+ special_tokens:
77
+ bos_token: "<|endoftext|>"
78
+ eos_token: "<|im_end|>"
79
+ unk_token: "<|endoftext|>"
80
+ pad_token: "<|endoftext|>"
81
+ tokens:
82
+ - "<|im_start|>"
83
+ - "<|im_end|>"