from transformers import PretrainedConfig class LstmConfig(PretrainedConfig): model_type = "lstm" def __init__( self, num_hidden_layers=16, hidden_size = 640, vocab_size = 128256, intermediate_size = 2560, pad_token_id = 128004, bos_token_id = 128000, eos_token_id = 128001, initializer_range = 0.02, initializer_gain = 1, tie_word_embeddings = True, **kwargs, ): self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.vocab_size = vocab_size self.intermediate_size = intermediate_size self.initializer_range = initializer_range self.initializer_gain = initializer_gain self.tie_word_embeddings = tie_word_embeddings super().__init__( pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, tie_word_embeddings=tie_word_embeddings, **kwargs, )