winglian commited on
Commit
bdbca8f
1 Parent(s): 42410c7

more fixes

Browse files
scripts/finetune.py CHANGED
@@ -1,7 +1,6 @@
1
  import importlib
2
  import logging
3
  import os
4
- import pathlib
5
  import random
6
  import signal
7
  import sys
@@ -10,7 +9,6 @@ from typing import Optional
10
 
11
  import fire
12
  import torch
13
- import transformers
14
  import yaml
15
  from attrdict import AttrDefault
16
 
@@ -236,7 +234,9 @@ def train(
236
  logging.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}")
237
 
238
  # TODO do we need this fix? https://huggingface.co/docs/accelerate/usage_guides/fsdp#saving-and-loading
239
- model.save_pretrained(cfg.output_dir)
 
 
240
  # trainer.save_model(cfg.output_dir) # TODO this may be needed for deepspeed to work? need to review another time
241
 
242
 
 
1
  import importlib
2
  import logging
3
  import os
 
4
  import random
5
  import signal
6
  import sys
 
9
 
10
  import fire
11
  import torch
 
12
  import yaml
13
  from attrdict import AttrDefault
14
 
 
234
  logging.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}")
235
 
236
  # TODO do we need this fix? https://huggingface.co/docs/accelerate/usage_guides/fsdp#saving-and-loading
237
+ # only save on rank 0, otherwise it corrupts output on multi-GPU when multiple processes attempt to write the same file
238
+ if cfg.local_rank == 0:
239
+ model.save_pretrained(cfg.output_dir)
240
  # trainer.save_model(cfg.output_dir) # TODO this may be needed for deepspeed to work? need to review another time
241
 
242
 
src/axolotl/utils/models.py CHANGED
@@ -1,4 +1,5 @@
1
  import logging
 
2
  import os
3
  from pathlib import Path
4
  from typing import Optional, Tuple, TYPE_CHECKING
@@ -180,12 +181,14 @@ def load_model(
180
  tokenizer.add_special_tokens({"pad_token": "[PAD]"})
181
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
182
 
183
- if cfg.tokens:
184
- for k, v in cfg.tokens.items():
185
  tokenizer.add_special_tokens({k: v})
 
 
186
 
187
- # this should only be needed if you are messing with new tokens in the vocab
188
- # model.resize_token_embeddings(len(tokenizer))
189
 
190
  if cfg.adapter and load_in_8bit and not cfg.load_4bit:
191
  logging.info("converting PEFT model w/ prepare_model_for_int8_training")
@@ -221,6 +224,7 @@ def load_model(
221
  requires_grad.append(f"{name}: {param.requires_grad}")
222
  if len(requires_grad) == 0:
223
  logging.warning("there are no parameters that require gradient updates")
 
224
 
225
  # TODO resume_from_checkpoint handling
226
  return model, tokenizer, lora_config
 
1
  import logging
2
+ import math
3
  import os
4
  from pathlib import Path
5
  from typing import Optional, Tuple, TYPE_CHECKING
 
181
  tokenizer.add_special_tokens({"pad_token": "[PAD]"})
182
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
183
 
184
+ if cfg.special_tokens:
185
+ for k, v in cfg.special_tokens.items():
186
  tokenizer.add_special_tokens({k: v})
187
+ if cfg.tokens:
188
+ tokenizer.add_tokens(cfg.tokens)
189
 
190
+ embeddings_len = math.ceil(len(tokenizer) / 32) * 32
191
+ model.resize_token_embeddings(embeddings_len)
192
 
193
  if cfg.adapter and load_in_8bit and not cfg.load_4bit:
194
  logging.info("converting PEFT model w/ prepare_model_for_int8_training")
 
224
  requires_grad.append(f"{name}: {param.requires_grad}")
225
  if len(requires_grad) == 0:
226
  logging.warning("there are no parameters that require gradient updates")
227
+ model.config.use_cache = False
228
 
229
  # TODO resume_from_checkpoint handling
230
  return model, tokenizer, lora_config