from transformers import PretrainedConfig import torch class ImpressoConfig(PretrainedConfig): model_type = "stacked_bert" def __init__( self, vocab_size=30522, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=2, initializer_range=0.02, layer_norm_eps=1e-12, pad_token_id=0, position_embedding_type="absolute", use_cache=True, classifier_dropout=None, pretrained_config=None, values_override=None, label_map=None, **kwargs, ): super().__init__(pad_token_id=pad_token_id, **kwargs) self.vocab_size = vocab_size self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads self.hidden_act = hidden_act self.intermediate_size = intermediate_size self.hidden_dropout_prob = hidden_dropout_prob self.attention_probs_dropout_prob = attention_probs_dropout_prob self.max_position_embeddings = max_position_embeddings self.type_vocab_size = type_vocab_size self.initializer_range = initializer_range self.layer_norm_eps = layer_norm_eps self.position_embedding_type = position_embedding_type self.use_cache = use_cache self.classifier_dropout = classifier_dropout self.pretrained_config = pretrained_config self.label_map = label_map self.values_override = values_override or {} self.outputs = { "logits": {"shape": [None, None, self.hidden_size], "dtype": "float32"} } @classmethod def is_torch_support_available(cls): """ Indicate whether Torch support is available for this configuration. Required for compatibility with certain parts of the Transformers library. """ return True @classmethod def patch_ops(self): """ A method required by some Hugging Face utilities to modify operator mappings. Currently, it performs no operation and is included for compatibility. Args: ops: A dictionary of operations to potentially patch. Returns: The (unmodified) ops dictionary. """ return None def generate_dummy_inputs(self, tokenizer, batch_size=1, seq_length=8, framework="pt"): """ Generate dummy inputs for testing or export. Args: tokenizer: The tokenizer used to tokenize inputs. batch_size: Number of input samples in the batch. seq_length: Length of each sequence. framework: Framework ("pt" for PyTorch, "tf" for TensorFlow). Returns: Dummy inputs as a dictionary. """ if framework == "pt": input_ids = torch.randint( low=0, high=self.vocab_size, size=(batch_size, seq_length), dtype=torch.long ) attention_mask = torch.ones((batch_size, seq_length), dtype=torch.long) return {"input_ids": input_ids, "attention_mask": attention_mask} else: raise ValueError("Framework '{}' not supported.".format(framework)) # Register the configuration with the transformers library ImpressoConfig.register_for_auto_class()