Text Generation
Transformers
PyTorch
English
llama
text-generation-inference
Inference Endpoints
winglian commited on
Commit
1aa55d6
1 Parent(s): 5111db7

commit 1st epoch(0.96) of training with wizardlm data

Browse files
README.md CHANGED
@@ -14,7 +14,8 @@ pipeline_tag: text-generation
14
 
15
  - `vicgalle/alpaca-gpt4` 1 epoch, learning rate 3e-5 https://wandb.ai/wing-lian/wizard-vicuna-gpt4/overview
16
  - `deepspeed scripts/finetune.py configs/axolotl/wizard-vicuna-13b-step1.yml --deepspeed configs/ds_config.json --num_epochs 2 --warmup_steps 46 --logging_steps 1 --save_steps 23`
17
- - `wizardlm` TBD
 
18
  - `vicuna` TBD
19
 
20
- <pre>Brought to you by the Freedom AI Collective</pre>
 
14
 
15
  - `vicgalle/alpaca-gpt4` 1 epoch, learning rate 3e-5 https://wandb.ai/wing-lian/wizard-vicuna-gpt4/overview
16
  - `deepspeed scripts/finetune.py configs/axolotl/wizard-vicuna-13b-step1.yml --deepspeed configs/ds_config.json --num_epochs 2 --warmup_steps 46 --logging_steps 1 --save_steps 23`
17
+ - `wizardlm` https://wandb.ai/wing-lian/wizard-vicuna-gpt4/runs/4y38knw4
18
+ - `deepspeed scripts/finetune.py configs/axolotl/wizard-vicuna-13b-step2.yml --deepspeed configs/ds_config-step2.json --num_epochs 2 --logging_steps 1`
19
  - `vicuna` TBD
20
 
21
+ <pre>Brought to you by the Freedom AI Collective</pre>
configs/axolotl/wizard-vicuna-13b-step2.yml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # base_model: huggyllama/llama-13b
2
+ # base_model_config: huggyllama/llama-13b
3
+ base_model: /workspace/llama-13b-alpaca-wizard-vicuna/
4
+ base_model_config: huggyllama/llama-13b
5
+ model_type: LlamaForCausalLM
6
+ tokenizer_type: LlamaTokenizer
7
+ load_in_8bit: false
8
+ datasets:
9
+ # - path: vicgalle/alpaca-gpt4
10
+ # type: alpaca
11
+ # - path: anon8231489123/ShareGPT_Vicuna_unfiltered
12
+ # data_files: ShareGPT_V3_unfiltered_cleaned_split_no_imsorry.json
13
+ # type: sharegpt
14
+ - path: ehartford/WizardLM_alpaca_evol_instruct_70k_unfiltered
15
+ type: alpaca
16
+ dataset_prepared_path: data/last_run_prepared
17
+ val_set_size: 0.04
18
+ adapter:
19
+ lora_model_dir:
20
+ sequence_len: 2048
21
+ max_packed_sequence_len: 2048
22
+ lora_r: 8
23
+ lora_alpha: 16
24
+ lora_dropout: 0.05
25
+ lora_target_modules:
26
+ - q_proj
27
+ - v_proj
28
+ # - k_proj
29
+ # - o_proj
30
+ lora_fan_in_fan_out: false
31
+ wandb_project:
32
+ wandb_watch:
33
+ wandb_run_id:
34
+ wandb_log_model: checkpoint
35
+ output_dir: ./wizard-lm-out
36
+ batch_size: 128
37
+ micro_batch_size: 1
38
+ num_epochs: 2
39
+ warmup_steps: 117
40
+ logging_steps:
41
+ learning_rate: 0.000003
42
+ optimizer: adamw_torch
43
+ torchdistx_path:
44
+ lr_scheduler: one_cycle
45
+ log_sweep_min_lr: 2e-6
46
+ log_sweep_max_lr: 1e-4
47
+ train_on_inputs: false
48
+ group_by_length: false
49
+ bf16: true
50
+ tf32: true
51
+ gradient_checkpointing:
52
+ early_stopping_patience:
53
+ resume_from_checkpoint:
54
+ auto_resume_from_checkpoints:
55
+ local_rank:
56
+ load_4bit:
57
+ xformers_attention:
58
+ flash_attention: true
59
+ gptq_groupsize:
60
+ gptq_model_v1:
61
+ save_steps: 56
62
+ eval_steps: 14
63
+ debug:
64
+ deepspeed:
65
+ weight_decay: 0.0
66
+ fsdp:
67
+ fsdp_config:
68
+ fsdp_transformer_layer_cls_to_wrap:
69
+ fsdp_min_num_params: 2000
70
+ fsdp_backward_prefetch:
71
+ - backward_pre
72
+ limit_all_gathers: false
73
+ special_tokens:
74
+ pad_token: "[PAD]"
75
+ bos_token: "<s>"
76
+ eos_token: "</s>"
77
+ unk_token: "<unk>"
78
+
configs/ds_config-step2.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "zero_optimization": {
3
+ "stage": 3,
4
+ "offload_optimizer": {
5
+ "device": "cpu",
6
+ "pin_memory": true
7
+ },
8
+ "offload_param": {
9
+ "device": "cpu",
10
+ "pin_memory": true
11
+ },
12
+ "overlap_comm": true,
13
+ "contiguous_gradients": true,
14
+ "sub_group_size": 0,
15
+ "reduce_bucket_size": "auto",
16
+ "stage3_prefetch_bucket_size": "auto",
17
+ "stage3_param_persistence_threshold": "auto",
18
+ "stage3_max_live_parameters": 0,
19
+ "stage3_max_reuse_distance": 0,
20
+ "stage3_gather_16bit_weights_on_model_save": true
21
+ },
22
+ "bf16": {
23
+ "enabled": "auto"
24
+ },
25
+ "fp16": {
26
+ "enabled": "auto",
27
+ "auto_cast": false,
28
+ "loss_scale": 0,
29
+ "initial_scale_power": 32,
30
+ "loss_scale_window": 1000,
31
+ "hysteresis": 2,
32
+ "min_loss_scale": 1
33
+ },
34
+ "optimizer": {
35
+ "type": "AdamW",
36
+ "params": {
37
+ "lr": "auto",
38
+ "betas": [
39
+ 0.9,
40
+ 0.999
41
+ ],
42
+ "eps": 1e-8,
43
+ "weight_decay": 0
44
+ }
45
+ },
46
+ "scheduler": {
47
+ "type": "OneCycle",
48
+ "params": {
49
+ "cycle_min_lr": 0.0000003,
50
+ "cycle_max_lr": 0.000003,
51
+ "cycle_first_step_size": 117
52
+ }
53
+ },
54
+ "train_batch_size": "auto",
55
+ "train_micro_batch_size_per_gpu": "auto",
56
+ "wall_clock_breakdown": false
57
+ }
58
+
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:889db4f7bfe042df8a8a31be88256992bfb30eece88dca60fadaa83810bf7b13
3
  size 26031868013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a0347a171523a89c7faa94c48cdc17284a8dedeb2505c0b1bc4bf7189e33b26
3
  size 26031868013