winglian commited on
Commit
2a428e8
1 Parent(s): 06c61d6

better handling since xgen tokenizer breaks with convert_tokens_to_ids

Browse files
Files changed (1) hide show
  1. src/axolotl/prompt_tokenizers.py +12 -6
src/axolotl/prompt_tokenizers.py CHANGED
@@ -48,16 +48,22 @@ class PromptTokenizingStrategy(abc.ABC):
48
 
49
  @functools.lru_cache(maxsize=128)
50
  def _get_user_token(self):
51
- id_or_ids = self.tokenizer.convert_tokens_to_ids("<|USER|>")
52
- if isinstance(id_or_ids, (int,)):
53
- return id_or_ids
 
 
 
54
  return False
55
 
56
  @functools.lru_cache(maxsize=128)
57
  def _get_assistant_token(self):
58
- id_or_ids = self.tokenizer.convert_tokens_to_ids("<|ASSISTANT|>")
59
- if isinstance(id_or_ids, (int,)):
60
- return id_or_ids
 
 
 
61
  return False
62
 
63
  def _tokenize(self, prompt: str, add_eos_token=True, strip_bos_token=False):
 
48
 
49
  @functools.lru_cache(maxsize=128)
50
  def _get_user_token(self):
51
+ try:
52
+ id_or_ids = self.tokenizer.convert_tokens_to_ids("<|USER|>")
53
+ if isinstance(id_or_ids, (int,)):
54
+ return id_or_ids
55
+ except KeyError:
56
+ pass
57
  return False
58
 
59
  @functools.lru_cache(maxsize=128)
60
  def _get_assistant_token(self):
61
+ try:
62
+ id_or_ids = self.tokenizer.convert_tokens_to_ids("<|ASSISTANT|>")
63
+ if isinstance(id_or_ids, (int,)):
64
+ return id_or_ids
65
+ except KeyError:
66
+ pass
67
  return False
68
 
69
  def _tokenize(self, prompt: str, add_eos_token=True, strip_bos_token=False):