Spaces:

Dovakiins
/

qwerrwe

Build error

winglian commited on Jun 11, 2023

Commit

effbbf6

1 Parent(s): c530e4b

more pruning

Files changed (4) hide show

configs/cerebras_1_3B_alpaca.yml DELETED Viewed

@@ -1,40 +0,0 @@
-base_model: cerebras/Cerebras-GPT-1.3B
-model_type: AutoModelForCausalLM
-tokenizer_type: AutoTokenizer
-load_in_8bit: true
-datasets:
-  - path: data/alpaca_data_gpt4.jsonl
-    type: alpaca
-  - path: data/vicuna_cleaned.jsonl
-    type: sharegpt
-  - path: data/gpt4-instruct-similarity-0.6-dataset.jsonl
-    type: gpteacher
-  - path: data/roleplay-similarity_0.6-instruct-dataset.jsonl
-    type: gpteacher
-dataset_prepared_path: last_run_prepared
-val_set_size: 0.05
-adapter: lora
-sequence_len: 2048
-lora_r: 8
-lora_alpha: 16
-lora_dropout: 0.05
-lora_target_modules:
-  - c_attn
-lora_fan_in_fan_out: false
-wandb_project: pythia-1.4b-lora
-wandb_watch:
-wandb_run_id:
-wandb_log_model:
-output_dir: ./lora-alpaca
-gradient_accumulation_steps: 1
-micro_batch_size: 4
-num_epochs: 5
-learning_rate: 0.0003
-train_on_inputs: false
-group_by_length: false
-bf16: True
-tf32: True
-gradient_checkpointing:
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:

configs/galactica_1_3B.yml DELETED Viewed

@@ -1,41 +0,0 @@
-base_model: facebook/galactica-1.3b
-model_type: AutoModelForCausalLM
-tokenizer_type: AutoTokenizer
-load_in_8bit: false
-datasets:
-  - path: tatsu-lab/alpaca
-    type: alpaca
-dataset_prepared_path: last_run_prepared
-val_set_size: 0.1
-adapter:
-lora_model_dir:
-sequence_len: 1024
-max_packed_sequence_len: 1024
-lora_r: 8
-lora_alpha: 16
-lora_dropout: 0.05
-lora_target_modules:
-  - q_proj
-  - v_proj
-lora_fan_in_fan_out: false
-wandb_project:
-wandb_watch:
-wandb_run_id:
-wandb_log_model:
-output_dir: ./lora-llama-alpaca
-gradient_accumulation_steps: 1
-micro_batch_size: 16
-num_epochs: 3
-learning_rate: 0.00003
-train_on_inputs: false
-group_by_length: false
-bf16: false
-tf32: false
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:
-tokens:
-  pad_token: "[PAD]"
-  bos_token: "<s>"
-  eos_token: "</s>"
-  unk_token: "<unk>"

configs/gpt_neox_20b.yml DELETED Viewed

@@ -1,39 +0,0 @@
-base_model: EleutherAI/gpt-neox-20b
-base_model_ignore_patterns: pytorch*  # prefer safetensors
-model_type: GPTNeoXForCausalLM
-tokenizer_type: AutoTokenizer
-load_in_8bit: true
-datasets:
-  - path: nomic-ai/gpt4all-j-prompt-generations
-    type: alpaca
-    shards: 4
-    shards_index: 0
-dataset_prepared_path: last_run_prepared
-val_set_size: 0.05
-adapter: lora
-lora_model_dir:
-sequence_len: 2048
-max_packed_sequence_len: 2048
-lora_r: 8
-lora_alpha: 32
-lora_dropout: 0.05
-lora_target_modules:
-  - query_key_value
-lora_fan_in_fan_out: true  # pythia/GPTNeoX lora specific
-wandb_project: gpt4all-neox-20b
-wandb_watch:
-wandb_run_id:
-wandb_log_model:
-output_dir: ./gpt4all-neox-20b
-gradient_accumulation_steps: 1
-micro_batch_size: 4
-num_epochs: 5
-learning_rate: 0.00003
-lr_scheduler: one_cycle
-train_on_inputs: false
-group_by_length: false
-bf16: True
-tf32: True
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:

configs/stability_3b.yml → examples/cerebras/qlora.yml RENAMED Viewed

@@ -1,38 +1,45 @@
-base_model: stabilityai/stablelm-base-alpha-3b
-base_model_config: stabilityai/stablelm-base-alpha-3b
 load_in_8bit: false
 datasets:
-  - path: vicgalle/alpaca-gpt4
     type: alpaca
 dataset_prepared_path: last_run_prepared
-val_set_size: 0.04
-adapter:
 lora_model_dir:
-sequence_len: 4096
-max_packed_sequence_len: 4096
-lora_r: 8
-lora_alpha: 16
 lora_dropout: 0.05
 lora_target_modules:
-  - q_proj
-  - v_proj
-lora_fan_in_fan_out: false
-wandb_project: stable-alpaca-3b
 wandb_watch:
 wandb_run_id:
 wandb_log_model:
-output_dir: ./stable-alpaca-3b
-gradient_accumulation_steps: 1
-micro_batch_size: 1
-num_epochs: 1
-optimizer: adamw_bnb_8bit
 torchdistx_path:
 lr_scheduler: cosine
-learning_rate: 0.0000002
 train_on_inputs: false
-group_by_length: false
 bf16: true
 tf32: true
 early_stopping_patience:
 resume_from_checkpoint:
 local_rank:
@@ -41,16 +48,13 @@ xformers_attention: true
 flash_attention:
 gptq_groupsize:
 gptq_model_v1:
-warmup_steps: 100
-eval_steps: 50
-save_steps: 200
 debug:
 deepspeed:
-weight_decay: 0.01
 fsdp:
 fsdp_config:
-#tokens:
-#  pad_token: "[PAD]"
-#  bos_token: "<s>"
-#  eos_token: "</s>"
-#  unk_token: "<unk>"

+base_model: cerebras/Cerebras-GPT-1.3B
+base_model_config: cerebras/Cerebras-GPT-1.3B
 load_in_8bit: false
+load_in_4bit: true
+strict: false
+push_dataset_to_hub:
 datasets:
+  - path: teknium/GPT4-LLM-Cleaned
     type: alpaca
 dataset_prepared_path: last_run_prepared
+val_set_size: 0.01
+adapter: qlora
 lora_model_dir:
+sequence_len: 2048
+max_packed_sequence_len: 2048
+lora_r: 16
+lora_alpha: 32
 lora_dropout: 0.05
 lora_target_modules:
+  - c_fc
+  - c_attn
+  - c_proj
+lora_target_linear:
+lora_fan_in_fan_out:
+wandb_project:
 wandb_watch:
 wandb_run_id:
 wandb_log_model:
+output_dir: ./qlora-out
+batch_size: 4
+micro_batch_size: 4
+num_epochs: 2
+optimizer: paged_adamw_8bit
 torchdistx_path:
 lr_scheduler: cosine
+learning_rate: 0.0002
 train_on_inputs: false
+group_by_length: true
 bf16: true
+fp16: false
 tf32: true
+gradient_checkpointing: true
 early_stopping_patience:
 resume_from_checkpoint:
 local_rank:
 flash_attention:
 gptq_groupsize:
 gptq_model_v1:
+warmup_steps: 10
+eval_steps: 20
+save_steps:
 debug:
 deepspeed:
+weight_decay: 0.1
 fsdp:
 fsdp_config:
+special_tokens:
+  pad_token: "<|endoftext|>"