Spaces:

Dovakiins
/

qwerrwe

Build error

winglian commited on May 15, 2023

Commit

bdbca8f

1 Parent(s): 42410c7

more fixes

Files changed (2) hide show

scripts/finetune.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import importlib
 import logging
 import os
-import pathlib
 import random
 import signal
 import sys
@@ -10,7 +9,6 @@ from typing import Optional
 import fire
 import torch
-import transformers
 import yaml
 from attrdict import AttrDefault
@@ -236,7 +234,9 @@ def train(
     logging.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}")
     # TODO do we need this fix? https://huggingface.co/docs/accelerate/usage_guides/fsdp#saving-and-loading
-    model.save_pretrained(cfg.output_dir)
     # trainer.save_model(cfg.output_dir)  # TODO this may be needed for deepspeed to work? need to review another time

 import importlib
 import logging
 import os
 import random
 import signal
 import sys
 import fire
 import torch
 import yaml
 from attrdict import AttrDefault
     logging.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}")
     # TODO do we need this fix? https://huggingface.co/docs/accelerate/usage_guides/fsdp#saving-and-loading
+    # only save on rank 0, otherwise it corrupts output on multi-GPU when multiple processes attempt to write the same file
+    if cfg.local_rank == 0:
+        model.save_pretrained(cfg.output_dir)
     # trainer.save_model(cfg.output_dir)  # TODO this may be needed for deepspeed to work? need to review another time

src/axolotl/utils/models.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import logging
 import os
 from pathlib import Path
 from typing import Optional, Tuple, TYPE_CHECKING
@@ -180,12 +181,14 @@ def load_model(
         tokenizer.add_special_tokens({"pad_token": "[PAD]"})
         os.environ["TOKENIZERS_PARALLELISM"] = "false"
-    if cfg.tokens:
-        for k, v in cfg.tokens.items():
             tokenizer.add_special_tokens({k: v})
-    # this should only be needed if you are messing with new tokens in the vocab
-    # model.resize_token_embeddings(len(tokenizer))
     if cfg.adapter and load_in_8bit and not cfg.load_4bit:
         logging.info("converting PEFT model w/ prepare_model_for_int8_training")
@@ -221,6 +224,7 @@ def load_model(
             requires_grad.append(f"{name}: {param.requires_grad}")
     if len(requires_grad) == 0:
         logging.warning("there are no parameters that require gradient updates")
     # TODO resume_from_checkpoint handling
     return model, tokenizer, lora_config

 import logging
+import math
 import os
 from pathlib import Path
 from typing import Optional, Tuple, TYPE_CHECKING
         tokenizer.add_special_tokens({"pad_token": "[PAD]"})
         os.environ["TOKENIZERS_PARALLELISM"] = "false"
+    if cfg.special_tokens:
+        for k, v in cfg.special_tokens.items():
             tokenizer.add_special_tokens({k: v})
+    if cfg.tokens:
+        tokenizer.add_tokens(cfg.tokens)
+    embeddings_len = math.ceil(len(tokenizer) / 32) * 32
+    model.resize_token_embeddings(embeddings_len)
     if cfg.adapter and load_in_8bit and not cfg.load_4bit:
         logging.info("converting PEFT model w/ prepare_model_for_int8_training")
             requires_grad.append(f"{name}: {param.requires_grad}")
     if len(requires_grad) == 0:
         logging.warning("there are no parameters that require gradient updates")
+    model.config.use_cache = False
     # TODO resume_from_checkpoint handling
     return model, tokenizer, lora_config