|
import torch |
|
from torch import nn |
|
from transformers import BertPreTrainedModel |
|
|
|
class ParagramSPModel(BertPreTrainedModel): |
|
def __init__(self, config): |
|
super().__init__(config) |
|
self.config = config |
|
self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id) |
|
|
|
self.post_init() |
|
|
|
def filter_input_ids(self, input_ids): |
|
output = [] |
|
length = input_ids.shape[1] |
|
for i in range(input_ids.shape[0]): |
|
ids = input_ids[i] |
|
filtered_ids = [] |
|
for j in ids: |
|
if j > 0: |
|
filtered_ids.append(j) |
|
if len(filtered_ids) == 0: |
|
filtered_ids = [0] |
|
output.append(filtered_ids + [self.config.pad_token_id] * (length - len(filtered_ids))) |
|
return torch.tensor(output) |
|
|
|
def forward(self, input_ids, attention_mask): |
|
print(input_ids) |
|
print(attention_mask) |
|
input_ids = self.filter_input_ids(input_ids) |
|
attention_mask = input_ids != self.config.pad_token_id |
|
print(input_ids) |
|
print(attention_mask) |
|
embeddings = self.word_embeddings(input_ids) |
|
masked_embeddings = embeddings * attention_mask[:, :, None] |
|
mean_pooled_embeddings = masked_embeddings.sum(dim=1) / attention_mask[:, :, None].sum(dim=1) |
|
print(attention_mask[:, :, None].sum(dim=1)) |
|
return (embeddings, mean_pooled_embeddings, embeddings) |