distributed-optimized-gpt2-1b-bnb-8bit-smashed / configuration_gpt_optimized.py
sharpenb's picture
Upload folder using huggingface_hub (#1)
8b7c475 verified
raw
history blame contribute delete
710 Bytes
from transformers import PretrainedConfig, GPT2Config
from typing import List
class GPTOptimConfig(GPT2Config):
model_type = "gpt_optimized"
def __init__(
self,
block_size: int = 1024, # max sequence length
vocab_size: int = 50257, # number of tokens: 50,000 BPE merges + 256 bytes tokens + 1 <|endoftext|> token
n_layer: int = 16, # number of layers
n_head: int = 16, # number of heads
n_embd: int = 1024, # embedding dimension
**kwargs,
):
super().__init__(**kwargs)
self.block_size = block_size
self.vocab_size = vocab_size
self.n_layer = n_layer
self.n_head = n_head
self.n_embd = n_embd