gracefully handle empty input (#442)
Browse files
src/axolotl/prompt_tokenizers.py
CHANGED
@@ -85,7 +85,11 @@ class PromptTokenizingStrategy(abc.ABC):
|
|
85 |
result["input_ids"].append(self.tokenizer.eos_token_id)
|
86 |
result["attention_mask"].append(1)
|
87 |
|
88 |
-
if
|
|
|
|
|
|
|
|
|
89 |
result["input_ids"] = result["input_ids"][1:]
|
90 |
result["attention_mask"] = result["attention_mask"][1:]
|
91 |
|
|
|
85 |
result["input_ids"].append(self.tokenizer.eos_token_id)
|
86 |
result["attention_mask"].append(1)
|
87 |
|
88 |
+
if (
|
89 |
+
len(result["input_ids"]) > 0
|
90 |
+
and result["input_ids"][0] == self.tokenizer.bos_token_id
|
91 |
+
and strip_bos_token
|
92 |
+
):
|
93 |
result["input_ids"] = result["input_ids"][1:]
|
94 |
result["attention_mask"] = result["attention_mask"][1:]
|
95 |
|