Spaces:

Dovakiins
/

qwerrwe

Build error

App Files Files Community

winglian

Nanobit commited on Jan 22

Commit

782b6a4

•

1 Parent(s): eaaeefc

set fp16 to false if bf16, update bf16: auto in example YAMLs (#1122) [skip ci]

Browse files

* set fp16 to false if bf16, update bf16: auto in example YAMLs

* unset fp16 so that it fallsback properly if bf16 isn't available

* Update README.md [skip-ci]

Co-authored-by: NanoCode012 <kevinvong@rocketmail.com>

* test that bf16 disables fp16

---------

Co-authored-by: NanoCode012 <kevinvong@rocketmail.com>

Files changed (38) hide show

README.md +2 -2
examples/cerebras/btlm-ft.yml +2 -2
examples/cerebras/qlora.yml +2 -2
examples/code-llama/13b/lora.yml +2 -2
examples/code-llama/13b/qlora.yml +2 -2
examples/code-llama/34b/lora.yml +2 -2
examples/code-llama/34b/qlora.yml +2 -2
examples/code-llama/7b/lora.yml +2 -2
examples/code-llama/7b/qlora.yml +2 -2
examples/falcon/config-7b-lora.yml +2 -2
examples/falcon/config-7b-qlora.yml +2 -2
examples/falcon/config-7b.yml +2 -2
examples/gptj/qlora.yml +2 -2
examples/jeopardy-bot/config.yml +1 -1
examples/llama-2/fft_optimized.yml +2 -2
examples/llama-2/lora.yml +2 -2
examples/llama-2/qlora.yml +2 -2
examples/llama-2/relora.yml +2 -2
examples/mamba/config.yml +2 -2
examples/mistral/config.yml +2 -2
examples/mistral/mixtral.yml +2 -2
examples/mistral/qlora.yml +2 -2
examples/mpt-7b/config.yml +1 -1
examples/phi/phi-ft.yml +2 -2
examples/phi/phi-qlora.yml +2 -2
examples/phi/phi2-ft.yml +2 -2
examples/pythia/lora.yml +1 -1
examples/qwen/lora.yml +2 -2
examples/qwen/qlora.yml +2 -2
examples/redpajama/config-3b.yml +1 -1
examples/replit-3b/config-lora.yml +1 -1
examples/tiny-llama/lora.yml +2 -2
examples/tiny-llama/pretrain.yml +2 -2
examples/tiny-llama/qlora.yml +2 -2
examples/xgen-7b/xgen-7b-8k-qlora.yml +2 -2
examples/yi-34B-chat/qlora.yml +2 -2
src/axolotl/utils/config.py +4 -0
tests/test_normalize_config.py +15 -0

README.md CHANGED Viewed

@@ -464,8 +464,8 @@ See [examples](examples) for quick start. It is recommended to duplicate and mod
   ```yaml
   load_in_4bit: true
   load_in_8bit: true
-  bf16: true # require >=ampere
-  fp16: true
   tf32: true # require >=ampere
   bfloat16: true # require >=ampere, use instead of bf16 when you don't want AMP (automatic mixed precision)
   float16: true # use instead of fp16 when you don't want AMP

   ```yaml
   load_in_4bit: true
   load_in_8bit: true
+  bf16: auto # require >=ampere, auto will detect if your GPU supports this and choose automatically.
+  fp16: # leave empty to use fp16 when bf16 is 'auto'. set to false if you want to fallback to fp32
   tf32: true # require >=ampere
   bfloat16: true # require >=ampere, use instead of bf16 when you don't want AMP (automatic mixed precision)
   float16: true # use instead of fp16 when you don't want AMP

examples/cerebras/btlm-ft.yml CHANGED Viewed

@@ -53,8 +53,8 @@ lr_quadratic_warmup: true
 learning_rate: 0.000085
 train_on_inputs: true
 group_by_length: false
-bf16: true
-fp16: false
 tf32: true
 gradient_checkpointing: false

 learning_rate: 0.000085
 train_on_inputs: true
 group_by_length: false
+bf16: auto
+fp16:
 tf32: true
 gradient_checkpointing: false

examples/cerebras/qlora.yml CHANGED Viewed

@@ -36,8 +36,8 @@ lr_scheduler: cosine
 learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: true
 gradient_checkpointing: true
 early_stopping_patience:

 learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: true
 gradient_checkpointing: true
 early_stopping_patience:

examples/code-llama/13b/lora.yml CHANGED Viewed

@@ -41,8 +41,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/code-llama/13b/qlora.yml CHANGED Viewed

@@ -43,8 +43,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/code-llama/34b/lora.yml CHANGED Viewed

@@ -41,8 +41,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/code-llama/34b/qlora.yml CHANGED Viewed

@@ -43,8 +43,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/code-llama/7b/lora.yml CHANGED Viewed

@@ -41,8 +41,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/code-llama/7b/qlora.yml CHANGED Viewed

@@ -43,8 +43,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/falcon/config-7b-lora.yml CHANGED Viewed

@@ -38,8 +38,8 @@ lr_scheduler: cosine
 learning_rate: 0.00003
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: true
 gradient_checkpointing: true
 early_stopping_patience:

 learning_rate: 0.00003
 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: true
 gradient_checkpointing: true
 early_stopping_patience:

examples/falcon/config-7b-qlora.yml CHANGED Viewed

@@ -64,8 +64,8 @@ lr_scheduler: cosine
 learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: true
 gradient_checkpointing: true
 # stop training after this many evaluation losses have increased in a row

 learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: true
 gradient_checkpointing: true
 # stop training after this many evaluation losses have increased in a row

examples/falcon/config-7b.yml CHANGED Viewed

@@ -38,8 +38,8 @@ lr_scheduler: cosine
 learning_rate: 0.00003
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: true
 gradient_checkpointing: true
 early_stopping_patience:

 learning_rate: 0.00003
 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: true
 gradient_checkpointing: true
 early_stopping_patience:

examples/gptj/qlora.yml CHANGED Viewed

@@ -33,8 +33,8 @@ lr_scheduler: cosine
 learning_rate: 0.0001
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: true
 gradient_checkpointing: true
 early_stopping_patience:

 learning_rate: 0.0001
 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: true
 gradient_checkpointing: true
 early_stopping_patience:

examples/jeopardy-bot/config.yml CHANGED Viewed

@@ -31,7 +31,7 @@ lr_scheduler: cosine
 learning_rate: 0.00003
 train_on_inputs: false
 group_by_length: false
-bf16: true
 tf32: true
 early_stopping_patience:
 resume_from_checkpoint:

 learning_rate: 0.00003
 train_on_inputs: false
 group_by_length: false
+bf16: auto
 tf32: true
 early_stopping_patience:
 resume_from_checkpoint:

examples/llama-2/fft_optimized.yml CHANGED Viewed

@@ -41,8 +41,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/llama-2/lora.yml CHANGED Viewed

@@ -41,8 +41,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/llama-2/qlora.yml CHANGED Viewed

@@ -43,8 +43,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/llama-2/relora.yml CHANGED Viewed

@@ -47,8 +47,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/mamba/config.yml CHANGED Viewed

@@ -34,8 +34,8 @@ learning_rate: 5e-5
 train_on_inputs: false
 group_by_length: true
-bf16: true
-fp16: false
 tf32: true
 gradient_checkpointing: false

 train_on_inputs: false
 group_by_length: true
+bf16: auto
+fp16:
 tf32: true
 gradient_checkpointing: false

examples/mistral/config.yml CHANGED Viewed

@@ -34,8 +34,8 @@ learning_rate: 0.000005
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/mistral/mixtral.yml CHANGED Viewed

@@ -63,8 +63,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/mistral/qlora.yml CHANGED Viewed

@@ -50,8 +50,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/mpt-7b/config.yml CHANGED Viewed

@@ -33,7 +33,7 @@ lr_scheduler: cosine
 learning_rate: 0.0000002
 train_on_inputs: false
 group_by_length: false
-bf16: true
 tf32: true
 early_stopping_patience:
 resume_from_checkpoint:

 learning_rate: 0.0000002
 train_on_inputs: false
 group_by_length: false
+bf16: auto
 tf32: true
 early_stopping_patience:
 resume_from_checkpoint:

examples/phi/phi-ft.yml CHANGED Viewed

@@ -46,8 +46,8 @@ learning_rate: 0.000003
 train_on_inputs: false
 group_by_length: true
-bf16: true
-fp16: false
 tf32: true
 gradient_checkpointing:

 train_on_inputs: false
 group_by_length: true
+bf16: auto
+fp16:
 tf32: true
 gradient_checkpointing:

examples/phi/phi-qlora.yml CHANGED Viewed

@@ -46,8 +46,8 @@ learning_rate: 0.000003
 train_on_inputs: false
 group_by_length: true
-bf16: true
-fp16: false
 tf32: true
 gradient_checkpointing:

 train_on_inputs: false
 group_by_length: true
+bf16: auto
+fp16:
 tf32: true
 gradient_checkpointing:

examples/phi/phi2-ft.yml CHANGED Viewed

@@ -49,8 +49,8 @@ learning_rate: 1e-5
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: true
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: true
 gradient_checkpointing: true

examples/pythia/lora.yml CHANGED Viewed

@@ -27,7 +27,7 @@ num_epochs: 4
 learning_rate: 0.00001
 train_on_inputs: false
 group_by_length: false
-bf16: true
 tf32: true
 early_stopping_patience:
 resume_from_checkpoint:

 learning_rate: 0.00001
 train_on_inputs: false
 group_by_length: false
+bf16: auto
 tf32: true
 early_stopping_patience:
 resume_from_checkpoint:

examples/qwen/lora.yml CHANGED Viewed

@@ -43,8 +43,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: false

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: false

examples/qwen/qlora.yml CHANGED Viewed

@@ -43,8 +43,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: false

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: false

examples/redpajama/config-3b.yml CHANGED Viewed

@@ -34,7 +34,7 @@ lr_scheduler: cosine
 learning_rate: 0.0000002
 train_on_inputs: false
 group_by_length: false
-bf16: true
 tf32: true
 early_stopping_patience:
 resume_from_checkpoint:

 learning_rate: 0.0000002
 train_on_inputs: false
 group_by_length: false
+bf16: auto
 tf32: true
 early_stopping_patience:
 resume_from_checkpoint:

examples/replit-3b/config-lora.yml CHANGED Viewed

@@ -33,7 +33,7 @@ lr_scheduler:
 learning_rate: 0.00001
 train_on_inputs: false
 group_by_length: false
-bf16: true
 tf32: true
 gradient_checkpointing:
 early_stopping_patience:

 learning_rate: 0.00001
 train_on_inputs: false
 group_by_length: false
+bf16: auto
 tf32: true
 gradient_checkpointing:
 early_stopping_patience:

examples/tiny-llama/lora.yml CHANGED Viewed

@@ -41,8 +41,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/tiny-llama/pretrain.yml CHANGED Viewed

@@ -34,8 +34,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/tiny-llama/qlora.yml CHANGED Viewed

@@ -43,8 +43,8 @@ learning_rate: 0.0002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true

 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true

examples/xgen-7b/xgen-7b-8k-qlora.yml CHANGED Viewed

@@ -62,8 +62,8 @@ lr_scheduler: cosine
 learning_rate: 0.00002
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
 tf32: false
 gradient_checkpointing: true
 # stop training after this many evaluation losses have increased in a row

 learning_rate: 0.00002
 train_on_inputs: false
 group_by_length: false
+bf16: auto
+fp16:
 tf32: false
 gradient_checkpointing: true
 # stop training after this many evaluation losses have increased in a row

examples/yi-34B-chat/qlora.yml CHANGED Viewed

@@ -7,8 +7,8 @@ load_in_8bit: false
 load_in_4bit: true
 strict: false
 sequence_len: 1024
-bf16: true
-fp16: false
 tf32: false
 flash_attention: true
 special_tokens:

 load_in_4bit: true
 strict: false
 sequence_len: 1024
+bf16: auto
+fp16:
 tf32: false
 flash_attention: true
 special_tokens:

src/axolotl/utils/config.py CHANGED Viewed

@@ -70,6 +70,8 @@ def normalize_config(cfg):
         else:
             LOG.debug("bf16 support not detected, disabling for this configuration.")
             cfg.bf16 = False
     if cfg.device == "mps":
         cfg.load_in_8bit = False
@@ -79,6 +81,8 @@ def normalize_config(cfg):
         cfg.bf16 = False
     else:
         torch.backends.cuda.matmul.allow_tf32 = cfg.tf32 or False
     if cfg.bf16 or cfg.bfloat16:
         cfg.torch_dtype = torch.bfloat16

         else:
             LOG.debug("bf16 support not detected, disabling for this configuration.")
             cfg.bf16 = False
+            if cfg.fp16 is None:
+                cfg.fp16 = True
     if cfg.device == "mps":
         cfg.load_in_8bit = False
         cfg.bf16 = False
     else:
         torch.backends.cuda.matmul.allow_tf32 = cfg.tf32 or False
+        if cfg.bf16:
+            cfg.fp16 = False
     if cfg.bf16 or cfg.bfloat16:
         cfg.torch_dtype = torch.bfloat16

tests/test_normalize_config.py CHANGED Viewed

@@ -78,13 +78,28 @@ class NormalizeConfigTestCase(unittest.TestCase):
         normalize_config(cfg)
         self.assertTrue(cfg.bf16)
     @patch("axolotl.utils.config.is_torch_bf16_gpu_available")
     def test_bf16_auto_setter_not_available(self, mock_bf16_avail):
         cfg = self._get_base_cfg()
         cfg.bf16 = "auto"
         mock_bf16_avail.return_value = False
         normalize_config(cfg)
         self.assertFalse(cfg.bf16)

         normalize_config(cfg)
         self.assertTrue(cfg.bf16)
+        self.assertFalse(cfg.fp16)
     @patch("axolotl.utils.config.is_torch_bf16_gpu_available")
     def test_bf16_auto_setter_not_available(self, mock_bf16_avail):
         cfg = self._get_base_cfg()
         cfg.bf16 = "auto"
+        cfg.fp16 = None
         mock_bf16_avail.return_value = False
         normalize_config(cfg)
         self.assertFalse(cfg.bf16)
+        self.assertTrue(cfg.fp16)
+    @patch("axolotl.utils.config.is_torch_bf16_gpu_available")
+    def test_bf16_disables_fp16(self, mock_bf16_avail):
+        cfg = self._get_base_cfg()
+        cfg.bf16 = True
+        cfg.fp16 = False
+        mock_bf16_avail.return_value = True
+        normalize_config(cfg)
+        self.assertTrue(cfg.bf16)
+        self.assertFalse(cfg.fp16)