Spaces:

Dovakiins
/

qwerrwe

Build error

winglian commited on May 23, 2023

Commit

b9d07aa

•

1 Parent(s): 3b4d055

prepare does all this already for qlora?

Files changed (1) hide show

src/axolotl/utils/models.py CHANGED Viewed

@@ -204,17 +204,17 @@ def load_model(
     """### Post-processing on the model
         Finally, we need to apply some post-processing on the 8-bit model to enable training, let's freeze all our layers, and cast the layer-norm in `float32` for stability. We also cast the output of the last layer in `float32` for the same reasons.
     """
-    if cfg.adapter == "qlora":
-        for param in model.parameters():
-            param.requires_grad = False  # freeze the model - train adapters later
-            if param.ndim == 1:
-                # cast the small parameters (e.g. layernorm) to fp32 for stability
-                param.data = param.data.to(torch.float32)
-        class CastOutputToFloat(nn.Sequential):
-            def forward(self, x):
-                return super().forward(x).to(torch.float32)
-        model.lm_head = CastOutputToFloat(model.lm_head)
     if not tokenizer:
         try:
@@ -255,7 +255,7 @@ def load_model(
     embeddings_len = math.ceil(len(tokenizer) / 32) * 32
     model.resize_token_embeddings(embeddings_len)
-    if cfg.adapter and load_in_8bit and not cfg.load_4bit:
         logging.info("converting PEFT model w/ prepare_model_for_int8_training")
         model = prepare_model_for_int8_training(model)

     """### Post-processing on the model
         Finally, we need to apply some post-processing on the 8-bit model to enable training, let's freeze all our layers, and cast the layer-norm in `float32` for stability. We also cast the output of the last layer in `float32` for the same reasons.
     """
+    # if cfg.adapter == "qlora":
+    #     for param in model.parameters():
+    #         param.requires_grad = False  # freeze the model - train adapters later
+    #         if param.ndim == 1:
+    #             # cast the small parameters (e.g. layernorm) to fp32 for stability
+    #             param.data = param.data.to(torch.float32)
+    #     class CastOutputToFloat(nn.Linear):
+    #         def forward(self, x):
+    #             return super().forward(x).to(torch.float32)
+    #
+    #     model.lm_head = CastOutputToFloat(model.lm_head.in_features, model.lm_head.out_features, model.lm_head.bias)
     if not tokenizer:
         try:
     embeddings_len = math.ceil(len(tokenizer) / 32) * 32
     model.resize_token_embeddings(embeddings_len)
+    if ((cfg.adapter == "lora" and load_in_8bit) or cfg.adapter == "qlora") and not cfg.load_4bit:
         logging.info("converting PEFT model w/ prepare_model_for_int8_training")
         model = prepare_model_for_int8_training(model)