Update preprocessing_molmo.py (#9)
Browse files- Update preprocessing_molmo.py (c09a1204d30e25f1cb5336cec0264b46bd5900c5)
- preprocessing_molmo.py +9 -6
preprocessing_molmo.py
CHANGED
|
@@ -23,7 +23,7 @@ from transformers.processing_utils import (
|
|
| 23 |
ProcessorMixin,
|
| 24 |
)
|
| 25 |
|
| 26 |
-
from transformers.tokenization_utils_base import TextInput
|
| 27 |
from transformers.utils import logging
|
| 28 |
|
| 29 |
from transformers import AutoTokenizer
|
|
@@ -116,6 +116,8 @@ class MolmoProcessor(ProcessorMixin):
|
|
| 116 |
self,
|
| 117 |
text: TextInput = None,
|
| 118 |
images: ImageInput = None,
|
|
|
|
|
|
|
| 119 |
**kwargs: Unpack[MolmoProcessorKwargs],
|
| 120 |
):
|
| 121 |
output_kwargs = self._merge_kwargs(
|
|
@@ -124,11 +126,12 @@ class MolmoProcessor(ProcessorMixin):
|
|
| 124 |
**kwargs,
|
| 125 |
)
|
| 126 |
|
| 127 |
-
tokens
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
|
|
|
| 132 |
|
| 133 |
image_token_id = self.special_token_ids[IMAGE_PROMPT]
|
| 134 |
|
|
|
|
| 23 |
ProcessorMixin,
|
| 24 |
)
|
| 25 |
|
| 26 |
+
from transformers.tokenization_utils_base import TextInput, PreTokenizedInput
|
| 27 |
from transformers.utils import logging
|
| 28 |
|
| 29 |
from transformers import AutoTokenizer
|
|
|
|
| 116 |
self,
|
| 117 |
text: TextInput = None,
|
| 118 |
images: ImageInput = None,
|
| 119 |
+
*,
|
| 120 |
+
tokens: Optional[PreTokenizedInput] = None,
|
| 121 |
**kwargs: Unpack[MolmoProcessorKwargs],
|
| 122 |
):
|
| 123 |
output_kwargs = self._merge_kwargs(
|
|
|
|
| 126 |
**kwargs,
|
| 127 |
)
|
| 128 |
|
| 129 |
+
if tokens is None:
|
| 130 |
+
tokens = self.get_tokens_input(
|
| 131 |
+
text,
|
| 132 |
+
output_kwargs["text_kwargs"]["message_format"],
|
| 133 |
+
output_kwargs["text_kwargs"]["always_start_with_space"],
|
| 134 |
+
)
|
| 135 |
|
| 136 |
image_token_id = self.special_token_ids[IMAGE_PROMPT]
|
| 137 |
|