Upload preprocessing_molmo.py with huggingface_hub
Browse files- preprocessing_molmo.py +20 -6
preprocessing_molmo.py
CHANGED
@@ -2,9 +2,11 @@
|
|
2 |
Processor class for Molmo.
|
3 |
"""
|
4 |
|
5 |
-
from typing import
|
6 |
|
7 |
-
|
|
|
|
|
8 |
|
9 |
try:
|
10 |
from typing import Unpack
|
@@ -25,7 +27,7 @@ from transformers.tokenization_utils_base import TextInput
|
|
25 |
from transformers.utils import logging
|
26 |
|
27 |
from transformers import AutoTokenizer
|
28 |
-
from .image_preprocessing_molmo import MolmoImagesKwargs,
|
29 |
|
30 |
|
31 |
logger = logging.get_logger(__name__)
|
@@ -81,7 +83,7 @@ class MolmoProcessorKwargs(ProcessingKwargs, total=False):
|
|
81 |
class MolmoProcessor(ProcessorMixin):
|
82 |
attributes = ["image_processor", "tokenizer"]
|
83 |
image_processor_class = "AutoImageProcessor"
|
84 |
-
tokenizer_class = ("
|
85 |
|
86 |
def __init__(self, image_processor: MolmoImageProcessor = None, tokenizer : AutoTokenizer = None, **kwargs):
|
87 |
# self.image_processor = image_processor
|
@@ -131,8 +133,20 @@ class MolmoProcessor(ProcessorMixin):
|
|
131 |
image_token_id = self.special_token_ids[IMAGE_PROMPT]
|
132 |
|
133 |
if images is not None:
|
134 |
-
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
# For now only support inserting images at the start
|
137 |
image_idx = [-1]*len(images)
|
138 |
else:
|
|
|
2 |
Processor class for Molmo.
|
3 |
"""
|
4 |
|
5 |
+
from typing import Optional
|
6 |
|
7 |
+
import PIL
|
8 |
+
from PIL import ImageOps
|
9 |
+
from PIL.Image import Image
|
10 |
|
11 |
try:
|
12 |
from typing import Unpack
|
|
|
27 |
from transformers.utils import logging
|
28 |
|
29 |
from transformers import AutoTokenizer
|
30 |
+
from .image_preprocessing_molmo import MolmoImagesKwargs, MolmoImageProcessor
|
31 |
|
32 |
|
33 |
logger = logging.get_logger(__name__)
|
|
|
83 |
class MolmoProcessor(ProcessorMixin):
|
84 |
attributes = ["image_processor", "tokenizer"]
|
85 |
image_processor_class = "AutoImageProcessor"
|
86 |
+
tokenizer_class = ("Qwen2Tokenizer", "Qwen2TokenizerFast")
|
87 |
|
88 |
def __init__(self, image_processor: MolmoImageProcessor = None, tokenizer : AutoTokenizer = None, **kwargs):
|
89 |
# self.image_processor = image_processor
|
|
|
133 |
image_token_id = self.special_token_ids[IMAGE_PROMPT]
|
134 |
|
135 |
if images is not None:
|
136 |
+
if not isinstance(images, (list, tuple)):
|
137 |
+
images = [images]
|
138 |
+
image_arrays = []
|
139 |
+
for image in images:
|
140 |
+
if isinstance(image, Image):
|
141 |
+
image = image.convert("RGB")
|
142 |
+
# Handle images with EXIF orientation tags, which PIL will ignore by default
|
143 |
+
# https://github.com/python-pillow/Pillow/issues/4703
|
144 |
+
img = ImageOps.exif_transpose(image)
|
145 |
+
image_arrays.append(np.array(image))
|
146 |
+
else:
|
147 |
+
assert len(image.shape) == 3 and image.shape[-1] == 3
|
148 |
+
image_arrays.append(image.astype(np.uint8))
|
149 |
+
images = image_arrays
|
150 |
# For now only support inserting images at the start
|
151 |
image_idx = [-1]*len(images)
|
152 |
else:
|