Spaces:

Dovakiins
/

qwerrwe

Build error

winglian commited on May 31, 2023

Commit

c2a0792

1 Parent(s): 5c3f5db

swap batch size for gradient accumulation steps to decouple from num gpu

Files changed (16) hide show

README.md CHANGED Viewed

@@ -265,7 +265,7 @@ wandb_log_model: # 'checkpoint'
 output_dir: ./completed-model
 # training hyperparameters
-batch_size: 8
 micro_batch_size: 2
 eval_batch_size: 2
 num_epochs: 3

 output_dir: ./completed-model
 # training hyperparameters
+gradient_accumulation_steps: 1
 micro_batch_size: 2
 eval_batch_size: 2
 num_epochs: 3

configs/cerebras_1_3B_alpaca.yml CHANGED Viewed

@@ -26,7 +26,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-alpaca
-batch_size: 32
 micro_batch_size: 4
 num_epochs: 5
 learning_rate: 0.0003

 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-alpaca
+gradient_accumulation_steps: 1
 micro_batch_size: 4
 num_epochs: 5
 learning_rate: 0.0003

configs/galactica_1_3B.yml CHANGED Viewed

@@ -23,7 +23,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-llama-alpaca
-batch_size: 32
 micro_batch_size: 16
 num_epochs: 3
 learning_rate: 0.00003

 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-llama-alpaca
+gradient_accumulation_steps: 1
 micro_batch_size: 16
 num_epochs: 3
 learning_rate: 0.00003

configs/gpt_neox_20b.yml CHANGED Viewed

@@ -25,7 +25,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./gpt4all-neox-20b
-batch_size: 48
 micro_batch_size: 4
 num_epochs: 5
 learning_rate: 0.00003

 wandb_run_id:
 wandb_log_model:
 output_dir: ./gpt4all-neox-20b
+gradient_accumulation_steps: 1
 micro_batch_size: 4
 num_epochs: 5
 learning_rate: 0.00003

configs/llama_13B_alpaca.yml CHANGED Viewed

@@ -23,7 +23,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./llama-13b-sharegpt
-batch_size: 64
 micro_batch_size: 2
 warmup_steps: 1000
 save_steps:

 wandb_run_id:
 wandb_log_model:
 output_dir: ./llama-13b-sharegpt
+gradient_accumulation_steps: 1
 micro_batch_size: 2
 warmup_steps: 1000
 save_steps:

configs/llama_65B_alpaca.yml CHANGED Viewed

@@ -29,7 +29,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-llama-alpaca
-batch_size: 128
 micro_batch_size: 16
 warmup_steps: 1000
 save_steps:

 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-llama-alpaca
+gradient_accumulation_steps: 1
 micro_batch_size: 16
 warmup_steps: 1000
 save_steps:

configs/llama_7B_4bit.yml CHANGED Viewed

@@ -26,7 +26,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-test
-batch_size: 8
 micro_batch_size: 2
 num_epochs: 3
 warmup_steps: 100

 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-test
+gradient_accumulation_steps: 1
 micro_batch_size: 2
 num_epochs: 3
 warmup_steps: 100

configs/llama_7B_alpaca.yml CHANGED Viewed

@@ -28,7 +28,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-llama-alpaca
-batch_size: 128
 micro_batch_size: 16
 num_epochs: 5
 learning_rate: 0.00003

 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-llama-alpaca
+gradient_accumulation_steps: 1
 micro_batch_size: 16
 num_epochs: 5
 learning_rate: 0.00003

configs/llama_7B_jeopardy.yml CHANGED Viewed

@@ -24,7 +24,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./jeopardy-bot-7b
-batch_size: 4
 micro_batch_size: 1
 num_epochs: 2
 optimizer: adamw_bnb_8bit

 wandb_run_id:
 wandb_log_model:
 output_dir: ./jeopardy-bot-7b
+gradient_accumulation_steps: 2
 micro_batch_size: 1
 num_epochs: 2
 optimizer: adamw_bnb_8bit

configs/pythia_1_2B_alpaca.yml CHANGED Viewed

@@ -28,7 +28,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-alpaca
-batch_size: 48
 micro_batch_size: 4
 num_epochs: 5
 learning_rate: 0.00001

 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-alpaca
+gradient_accumulation_steps: 1
 micro_batch_size: 4
 num_epochs: 5
 learning_rate: 0.00001

configs/quickstart.yml CHANGED Viewed

@@ -26,7 +26,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-test
-batch_size: 4
 micro_batch_size: 1
 num_epochs: 3
 warmup_steps: 100

 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-test
+gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 3
 warmup_steps: 100

configs/sample.yml CHANGED Viewed

@@ -53,7 +53,8 @@ wandb_log_model:
 # where to save the finsihed model to
 output_dir: ./completed-model
 # training hyperparameters
-batch_size: 8
 micro_batch_size: 2
 num_epochs: 3
 warmup_steps: 100

 # where to save the finsihed model to
 output_dir: ./completed-model
 # training hyperparameters
+gradient_accumulation_steps: 1
+batch_size:
 micro_batch_size: 2
 num_epochs: 3
 warmup_steps: 100

configs/stability_3b.yml CHANGED Viewed

@@ -22,7 +22,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./stable-alpaca-3b
-batch_size: 2
 micro_batch_size: 1
 num_epochs: 1
 optimizer: adamw_bnb_8bit

 wandb_run_id:
 wandb_log_model:
 output_dir: ./stable-alpaca-3b
+gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 1
 optimizer: adamw_bnb_8bit

configs/vicuna_13B_4bit_reflect.yml CHANGED Viewed

@@ -30,7 +30,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-reflect
-batch_size: 8
 micro_batch_size: 2
 num_epochs: 3
 learning_rate: 0.00003

 wandb_run_id:
 wandb_log_model:
 output_dir: ./lora-reflect
+gradient_accumulation_steps: 1
 micro_batch_size: 2
 num_epochs: 3
 learning_rate: 0.00003

examples/gptq-lora-7b/config.yml CHANGED Viewed

@@ -26,7 +26,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./llama-7b-lora-int4
-batch_size: 1
 micro_batch_size: 1
 num_epochs: 3
 optimizer: adamw_bnb_8bit

 wandb_run_id:
 wandb_log_model:
 output_dir: ./llama-7b-lora-int4
+gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 3
 optimizer: adamw_bnb_8bit

examples/mpt-7b/config.yml CHANGED Viewed

@@ -24,7 +24,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./mpt-alpaca-7b
-batch_size: 1
 micro_batch_size: 1
 num_epochs: 3
 optimizer: adamw_bnb_8bit

 wandb_run_id:
 wandb_log_model:
 output_dir: ./mpt-alpaca-7b
+gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 3
 optimizer: adamw_bnb_8bit