class PositionalEncoding(nn.Module): def __init__(self, d_model, dropout=0.1, max_len=5000): super(PositionalEncoding, self).__init__() self.dropout = nn.Dropout(p=dropout) pe = torch.zeros(max_len, d_model) # [max_len, d_model] position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) # [max_len, 1] div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) # [d_model/2] pe[:, 0::2] = torch.sin(position * div_term) # [max_len, d_model/2] pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0).transpose(0, 1) # 1,51,512 --> [51, 1, d_model] self.register_buffer('pe', pe) def forward(self, x): """ :param x: [x_len, batch_size, emb_size] :return: [x_len, batch_size, emb_size] """ x = x + self.pe[:x.size(0), :] # [x_len, batch_size, d_model] return self.dropout(x)