winglian commited on
Commit
9d629d8
1 Parent(s): d2e7f27

gracefully handle empty input (#442)

Browse files
Files changed (1) hide show
  1. src/axolotl/prompt_tokenizers.py +5 -1
src/axolotl/prompt_tokenizers.py CHANGED
@@ -85,7 +85,11 @@ class PromptTokenizingStrategy(abc.ABC):
85
  result["input_ids"].append(self.tokenizer.eos_token_id)
86
  result["attention_mask"].append(1)
87
 
88
- if result["input_ids"][0] == self.tokenizer.bos_token_id and strip_bos_token:
 
 
 
 
89
  result["input_ids"] = result["input_ids"][1:]
90
  result["attention_mask"] = result["attention_mask"][1:]
91
 
 
85
  result["input_ids"].append(self.tokenizer.eos_token_id)
86
  result["attention_mask"].append(1)
87
 
88
+ if (
89
+ len(result["input_ids"]) > 0
90
+ and result["input_ids"][0] == self.tokenizer.bos_token_id
91
+ and strip_bos_token
92
+ ):
93
  result["input_ids"] = result["input_ids"][1:]
94
  result["attention_mask"] = result["attention_mask"][1:]
95