File size: 1,644 Bytes
c8679a5 1e3a833 3fc7bdb c8679a5 13b32c9 3fc7bdb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
This model used hfl/chinese-roberta-wwm-ext-large backbone and was trained on SNLI, MNLI, DNLI, KvPI data in Chinese version.
Model structures are as follows:
```
class RobertaForSequenceClassification(nn.Module):
def __init__(self, tagset_size):
super(RobertaForSequenceClassification, self).__init__()
self.tagset_size = tagset_size
self.roberta_single= AutoModel.from_pretrained(pretrain_model_dir)
self.single_hidden2tag = RobertaClassificationHead(bert_hidden_dim, tagset_size)
def forward(self, input_ids, input_mask):
outputs_single = self.roberta_single(input_ids, input_mask, None)
hidden_states_single = outputs_single[1]#torch.tanh(self.hidden_layer_2(torch.tanh(self.hidden_layer_1(outputs_single[1])))) #(batch, hidden)
score_single = self.single_hidden2tag(hidden_states_single) #(batch, tag_set)
return score_single
class RobertaClassificationHead(nn.Module):
def __init__(self, bert_hidden_dim, num_labels):
super(RobertaClassificationHead, self).__init__()
self.dense = nn.Linear(bert_hidden_dim, bert_hidden_dim)
self.dropout = nn.Dropout(0.1)
self.out_proj = nn.Linear(bert_hidden_dim, num_labels)
def forward(self, features):
x = features#[:, 0, :] # take <s> token (equiv. to [CLS])
x = self.dropout(x)
x = self.dense(x)
x = torch.tanh(x)
x = self.dropout(x)
x = self.out_proj(x)
return x
model = RobertaForSequenceClassification(num_labels)
model.load_state_dict(torch.load(args.model_save_path+'Roberta_large_model.pt', map_location=device))``` |