GPT-2V / preprocessing.py
yuchenxie's picture
Create preprocessing.py
e4c7225 verified
raw
history blame contribute delete
328 Bytes
from transformers import GPT2Tokenizer
class ArlowGPTPreprocessor:
"""
Data preprocessor for the ArlowGPT model.
"""
def __init__(self, tokenizer: GPT2Tokenizer):
self.tokenizer = tokenizer
def preprocess_text(self, text: str):
return self.tokenizer(text, return_tensors="pt", padding=True)