from transformers import GPT2Tokenizer class ArlowGPTPreprocessor: """ Data preprocessor for the ArlowGPT model. """ def __init__(self, tokenizer: GPT2Tokenizer): self.tokenizer = tokenizer def preprocess_text(self, text: str): return self.tokenizer(text, return_tensors="pt", padding=True)